# ROBOTS.TXT FOR MASSAGETHESS.GR # Massage & Physiotherapy Center Thessaloniki # Optimized for SEO and content protection # Generated: 2025-01-10 # ============================================================================ # GLOBAL DIRECTIVES # ============================================================================ # Content signals per EU Directive 2019/790 (Digital Single Market) # Allow search indexing, block AI training (using comments as directive) # ============================================================================ # ALLOWED SEARCH ENGINES # ============================================================================ User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / # ============================================================================ # BLOCKED AI BOTS (PROTECT CONTENT FROM AI TRAINING) # ============================================================================ User-agent: Amazonbot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: GPTBot Disallow: / User-agent: meta-externalagent Disallow: / User-agent: anthropic-ai Disallow: / User-agent: cohere-ai Disallow: / User-agent: perplexitybot Disallow: / User-agent: youbot Disallow: / # ============================================================================ # BLOCKED UNWANTED BOTS # ============================================================================ User-agent: MJ12bot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: SEMrushBot Disallow: / User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: BacklinkCrawler Disallow: / # ============================================================================ # WORDPRESS SECURITY & PERFORMANCE # ============================================================================ User-agent: * Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/themes/ Disallow: /wp-content/cache/ Disallow: /wp-content/languages/ Disallow: /wp-content/mu-plugins/ Disallow: /wp-content/upgrade/ Disallow: /wp-content/uploads/ Disallow: /wp-content/backup/ Disallow: /readme.html Disallow: /license.txt Disallow: /xmlrpc.php Disallow: /wp-login.php Disallow: /wp-register.php Disallow: /trackback Disallow: /feed/ Disallow: /cgi-bin Disallow: /admin/ Disallow: /search/ # ============================================================================ # SPECIFIC CONTENT RESTRICTIONS # ============================================================================ # Block AI data files from search indexing Disallow: /llms.txt Disallow: /llms-full.txt # Block internal and test pages Disallow: /test/ Disallow: /dev/ Disallow: /staging/ Disallow: /temp/ Disallow: /backup/ # Block search and filter pages (prevent duplicate content) Disallow: /?s=* Disallow: /search/* Disallow: /filter/* Disallow: /tag/ Disallow: /category/ Disallow: /author/ # Block dynamic URLs with parameters Disallow: /*?* Disallow: /*.php$ Disallow: /*.inc$ Disallow: /*.log$ Disallow: /*.txt$ Disallow: /*attachment$ # Block WP Performance Optimizer files Disallow: /wp-content/uploads/wpo/wpo-plugins-tables-list.json # Block API endpoints from indexing Disallow: /wp-json/ # ============================================================================ # LANGUAGE-SPECIFIC RESTRICTIONS # ============================================================================ # Block duplicate language versions with query parameters Disallow: /*?lang=* Disallow: /*?lang=el* Disallow: /*?lang=en* # ============================================================================ # ALLOW SPECIFIC IMPORTANT PATHS # ============================================================================ Allow: /wp-content/uploads/*.* Allow: /wp-content/themes/* Allow: /*.css$ Allow: /*.js$ Allow: /*.jpg$ Allow: /*.jpeg$ Allow: /*.png$ Allow: /*.gif$ Allow: /*.webp$ Allow: /*.svg$ # ============================================================================ # CRAWL DELAY AND REQUEST RATE # ============================================================================ # Respect server resources Crawl-delay: 1 # ============================================================================ # HOST AND SITEMAP # ============================================================================ # Preferred domain (if multiple) # Host: https://massagethess.gr # XML Sitemaps Sitemap: https://massagethess.gr/sitemap_index.xml Sitemap: https://massagethess.gr/sitemap.xml # ============================================================================ # ADDITIONAL INSTRUCTIONS # ============================================================================ # NOTE: AI data files (llms.txt, llms-full.txt) are intentionally blocked # from search indexing to prevent duplicate content issues # AI bots are explicitly blocked from content scraping/training # LLMS files remain accessible at their direct URLs for legitimate AI use # HEALTH & MEDICAL CONTENT PROTECTION # Medical and health content requires careful handling # Ensure medical claims comply with local regulations