# ============================================================== # Robots.txt for ASKSonnie.INFO # Optimized for SEO, AIEO, and Responsible AI-Crawler Management # ============================================================== User-agent: * # Allow main assets for proper rendering Allow: /wp-content/uploads/ # Block crawl waste and sensitive directories Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /xmlrpc.php Disallow: /readme.html Disallow: /license.txt # Block feeds and thin content (avoid duplicate or low-value indexation) Disallow: /*/feed/ Disallow: /feed/ Disallow: /comments/feed/ # Block internal search results Disallow: /?s= Disallow: /search/ # Block REST API endpoints and system routes Disallow: /wp-json/ Disallow: /?rest_route=/ # Block legacy or junk URLs Disallow: /trackback/ Disallow: /*?replytocom # ============================================================== # ✅ Sitemaps for SEO Visibility # ============================================================== Sitemap: https://asksonnie.info/sitemap_index.xml Sitemap: https://asksonnie.info/news-sitemap.xml Sitemap: https://asksonnie.info/post-sitemap.xml Sitemap: https://asksonnie.info/page-sitemap.xml Sitemap: https://asksonnie.info/category-sitemap.xml # ============================================================== # ✅ Allow Legitimate Search & Discovery # ============================================================== User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Applebot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Yandex Allow: / # ============================================================== # 🤝 Controlled Read-Only Access (No Training Rights) # These bots can access content for query responses when links are shared # but are NOT permitted to use content for model training or dataset ingestion. # ============================================================== # --- OpenAI (ChatGPT, OAI-SearchBot) --- User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / # --- Google Gemini / Bard --- User-agent: Google-Extended Allow: / User-agent: Gemini Allow: / # --- Microsoft Copilot --- User-agent: Copilot Allow: / # --- Perplexity AI --- User-agent: PerplexityBot Allow: / # --- Anthropic Claude --- User-agent: Claude-Web Allow: / # --- Apple AI Read-Only Extension --- User-agent: Applebot-Extended Allow: / # ============================================================== # 🚫 Full Block for AI Model-Training / Data Harvesting Crawlers # ============================================================== # DeepSeek AI User-agent: DeepSeek Disallow: / User-agent: DeepSeekBot Disallow: / # Common Crawl (foundation for many AI datasets) User-agent: CCBot Disallow: / # Cohere AI User-agent: cohere-ai Disallow: / # Meta / Facebook AI User-agent: Meta-ExternalAgent Disallow: / # Amazon (LLM data indexer) User-agent: Amazonbot Disallow: / # You.com / YouBot User-agent: YouBot Disallow: / # xAI (Grok) User-agent: xai-bot Disallow: / # Mistral AI User-agent: MistralBot Disallow: / # HuggingFace User-agent: HuggingFaceBot Disallow: / # Kagi / Orion User-agent: KagiBot Disallow: / # Neeva / Snowflake (enterprise dataset bots) User-agent: NeevaBot Disallow: / # General future-proof pattern for new or unknown AI crawlers User-agent: *AI Disallow: / User-agent: *bot-LLM Disallow: / User-agent: *-AI Disallow: / # ============================================================== # 🧭 Policy Summary # - SEO bots (Google, Bing, etc.) → ✅ FULL ACCESS # - ChatGPT, Gemini, Copilot, Claude-Web, Perplexity, Applebot-Extended → ✅ READ-ONLY ACCESS (no training) # - DeepSeek, Data-for-Training, and unknown AI harvesters → 🚫 BLOCKED # - Keeps compliance with Responsible AI and digital content rights # ==============================================================