# =========================================================================== # robots.txt — Bible Empire Kampala # Domain : https://bibleempirekampala.ug/ # Updated : 2026-03 # Purpose : Maximise crawl budget for SEO-critical content, protect # private/utility paths, and control AI training scrapers. # # DEPLOYMENT: Place this file at the root of your web server so it is # accessible at https://bibleempirekampala.ug/robots.txt # =========================================================================== # --------------------------------------------------------------------------- # SECTION 1 — PRIMARY SEARCH ENGINES # Full access to all public content. Crawl-delay intentionally omitted for # Google and Bing — letting them self-regulate produces faster indexing. # --------------------------------------------------------------------------- User-agent: Googlebot Allow: / Allow: /images/ Allow: /css/ Allow: /site.webmanifest Disallow: /cdn-cgi/ Disallow: /icons/ User-agent: Googlebot-Image # Explicitly allow all product & gallery images for Google Image Search. # This is important — product images rank in image search and drive local traffic. Allow: /images/ User-agent: Googlebot-Video Allow: / User-agent: Googlebot-News Allow: / User-agent: AdsBot-Google Allow: / User-agent: AdsBot-Google-Mobile Allow: / User-agent: Bingbot Allow: / Allow: /images/ Allow: /css/ Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 5 User-agent: Slurp # Yahoo Search (powered by Bing) Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 10 User-agent: DuckDuckBot Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 5 User-agent: Baiduspider # Baidu — Chinese search engine, relevant for diaspora traffic Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 10 User-agent: YandexBot # Yandex — Russian search engine Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 10 # --------------------------------------------------------------------------- # SECTION 2 — SOCIAL MEDIA PREVIEW CRAWLERS # These bots read Open Graph and Twitter Card meta tags to generate # link previews when someone shares bibleempirekampala.ug on social media. # Blocking them breaks WhatsApp, Facebook, and Instagram link previews. # --------------------------------------------------------------------------- User-agent: facebookexternalhit # Facebook & Instagram link preview bot — reads og:image, og:title Allow: / Allow: /images/ User-agent: Twitterbot # Twitter/X card preview bot Allow: / Allow: /images/ User-agent: LinkedInBot Allow: / Allow: /images/ User-agent: WhatsApp # WhatsApp link preview — critical since the site uses wa.me CTAs Allow: / Allow: /images/ User-agent: TelegramBot Allow: / Allow: /images/ User-agent: Slackbot Allow: / Allow: /images/ # --------------------------------------------------------------------------- # SECTION 3 — SEO & ANALYTICS TOOLS # Allowing these lets Bible Empire track its own rankings, backlinks, # and competitor gaps in tools like Semrush and Ahrefs. # Remove or Disallow these if the client prefers not to be tracked # by third-party SEO tools. # --------------------------------------------------------------------------- User-agent: SemrushBot Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 10 User-agent: AhrefsBot Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 10 User-agent: MJ12bot # Majestic SEO Disallow: / # Blocked — Majestic crawls very aggressively; no client benefit. User-agent: DotBot # Moz SEO crawler Allow: / Disallow: /cdn-cgi/ Disallow: /icons/ Crawl-delay: 10 # --------------------------------------------------------------------------- # SECTION 4 — AI TRAINING & LLM SCRAPERS # These bots harvest web content to train large language models. # Blocking them does NOT affect Google search rankings. # Recommended: Block to protect the client's product descriptions, # pricing, and copywriting from being used in AI training datasets. # --------------------------------------------------------------------------- User-agent: GPTBot # OpenAI training scraper Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: Google-Extended # Google Gemini / Bard training scraper (separate from Googlebot) Disallow: / User-agent: anthropic-ai # Anthropic Claude training scraper Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: cohere-ai Disallow: / User-agent: PerplexityBot Disallow: / User-agent: YouBot Disallow: / User-agent: CCBot # Common Crawl — large open dataset used by many AI companies Disallow: / User-agent: omgili Disallow: / User-agent: omgilibot Disallow: / # --------------------------------------------------------------------------- # SECTION 5 — MALICIOUS, SPAM & AGGRESSIVE BOTS # These crawlers scrape contact details, product prices, and content # for spam, price monitoring, or content theft. Block them all. # --------------------------------------------------------------------------- User-agent: SeznamBot Disallow: / User-agent: PetalBot Disallow: / User-agent: ia_archiver # Alexa/Amazon archiver — no SEO benefit, high crawl cost Disallow: / User-agent: Exabot Disallow: / User-agent: sogou Disallow: / User-agent: Sogou web spider Disallow: / User-agent: proximic Disallow: / User-agent: GrapeshotCrawler Disallow: / User-agent: magpie-crawler Disallow: / User-agent: Screaming Frog SEO Spider # Prevent competitors from scraping prices and content via SEO auditing tools Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: SurveyBot Disallow: / User-agent: VoilaBot Disallow: / User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: ExtractorPro Disallow: / User-agent: WebCopier Disallow: / User-agent: HTTrack Disallow: / User-agent: Offline Explorer Disallow: / # --------------------------------------------------------------------------- # SECTION 6 — PATHS: ALWAYS BLOCK (applies to ALL bots not listed above) # These paths exist on the server but have zero SEO value and should # never be indexed. Blocking them preserves crawl budget for the pages # and content that actually rank and convert. # --------------------------------------------------------------------------- User-agent: * # Block Cloudflare internal utilities (email obfuscation, challenge pages, etc.) Disallow: /cdn-cgi/ # Block self-hosted FontAwesome icon library — pure CSS/font assets, no SEO value Disallow: /icons/ # Block any accidental server/editor temp files Disallow: /*.bak$ Disallow: /*.log$ Disallow: /*.sql$ Disallow: /*.env$ Disallow: /*.config$ # Block common CMS admin paths (defensive — in case the site ever migrates to WordPress, etc.) Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /wp-json/ Disallow: /wp-content/uploads/ Disallow: /admin/ Disallow: /administrator/ Disallow: /login/ Disallow: /dashboard/ Disallow: /cgi-bin/ # Allow all remaining public content Allow: / # --------------------------------------------------------------------------- # SECTION 7 — SITEMAP DECLARATION # Declare the sitemap location so every search engine crawler can find # and index all pages without waiting for a manual GSC submission. # Create this sitemap at: https://bibleempirekampala.ug/sitemap.xml # --------------------------------------------------------------------------- Sitemap: https://bibleempirekampala.ug/sitemap.xml