# robots.txt for GDK Digital # https://www.gdkdigital.com # Updated: 2026-05-28 (W-O1 from docs/WEBSITE_OVERHAUL_2026-05-28.md) # Refreshed: added ClaudeBot, OAI-SearchBot, Applebot-Extended, Meta-ExternalAgent, # DuckAssistBot, Amazonbot (2026-emerging LLM crawlers); confirmed Bytespider remains blocked. # ============================================================ # DEFAULT RULES (ALL CRAWLERS) # ============================================================ User-agent: * Allow: / # Block admin and application areas Disallow: /gdk-admin/ Disallow: /fortilis/app/ Disallow: /tdc/app/ Disallow: /crm/app/ Disallow: /feathers/app/ # Block API endpoints Disallow: /api/ # Block utility files Disallow: /*.json$ Disallow: /*.sql$ Disallow: /*.log$ Disallow: /.env Disallow: /.git/ Disallow: /node_modules/ # Allow CSS and JS for rendering Allow: /css/ Allow: /js/ Allow: /assets/ # ============================================================ # AI SEARCH CRAWLERS (WELCOME) # ============================================================ # OpenAI GPTBot User-agent: GPTBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 1 # OpenAI ChatGPT-User (real-time search) User-agent: ChatGPT-User Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Anthropic Claude (legacy user-agent name) User-agent: anthropic-ai Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Anthropic Claude (legacy web fetch user-agent) User-agent: Claude-Web Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Anthropic ClaudeBot (current canonical, 2025+) User-agent: ClaudeBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # OpenAI SearchBot (real-time web search for ChatGPT) User-agent: OAI-SearchBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Perplexity AI User-agent: PerplexityBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Google Bard / Gemini User-agent: Google-Extended Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Microsoft Bing Copilot User-agent: BingBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Common Crawl (used for AI training) User-agent: CCBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 2 # Cohere AI User-agent: cohere-ai Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Apple Intelligence / Siri (training opt-in) User-agent: Applebot-Extended Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Meta AI (Llama training + Meta AI surface) User-agent: Meta-ExternalAgent Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # DuckDuckGo Assist (DuckAssist answer engine) User-agent: DuckAssistBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Amazon (Alexa / AI assistant content) User-agent: Amazonbot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 2 # ============================================================ # SEO CRAWLERS # ============================================================ # Google User-agent: Googlebot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ User-agent: Googlebot-Image Allow: /assets/ Allow: /images/ # Bing User-agent: Bingbot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ # Yandex User-agent: Yandex Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 2 # ============================================================ # SEO TOOL CRAWLERS (RATE LIMITED) # ============================================================ User-agent: AhrefsBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 10 User-agent: SemrushBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 10 User-agent: MJ12bot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 10 User-agent: DotBot Allow: / Disallow: /gdk-admin/ Disallow: /api/ Disallow: /*/app/ Crawl-delay: 10 # ============================================================ # SOCIAL MEDIA CRAWLERS # ============================================================ User-agent: Twitterbot Allow: / User-agent: facebookexternalhit Allow: / User-agent: LinkedInBot Allow: / User-agent: Slackbot Allow: / # ============================================================ # BLOCK AGGRESSIVE/MALICIOUS BOTS # ============================================================ User-agent: MauiBot Disallow: / User-agent: SeznamBot Disallow: / User-agent: Sogou Disallow: / User-agent: Bytespider Disallow: / User-agent: PetalBot Disallow: / # ============================================================ # SITEMAP & LLMs.txt REFERENCES # ============================================================ Sitemap: https://www.gdkdigital.com/sitemap.xml # AI Content Guidelines (llms.txt standard) # See: https://www.gdkdigital.com/llms.txt # Spec: https://llmstxt.org/