diff --git a/.htaccess b/.htaccess index 512c274..c42f99e 100644 --- a/.htaccess +++ b/.htaccess @@ -1,3 +1,3 @@ RewriteEngine On RewriteCond %{HTTP_USER_AGENT} ^.*(AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot\ 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade\ indexer\ bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot).*$ [NC] -RewriteRule .* - [F,L] \ No newline at end of file +RewriteRule !^/?robots\.txt$ - [F,L] diff --git a/code/robots.py b/code/robots.py index 087b00b..bb18e70 100644 --- a/code/robots.py +++ b/code/robots.py @@ -142,7 +142,7 @@ def json_to_htaccess(robot_json): robots = map(lambda el: el.replace(" ", "\\ "), robot_json.keys()) htaccess += "|".join(robots) htaccess += ").*$ [NC]\n" - htaccess += "RewriteRule .* - [F,L]" + htaccess += "RewriteRule !^/?robots\\.txt$ - [F,L]\n" return htaccess diff --git a/code/test_files/.htaccess b/code/test_files/.htaccess index a34bf55..2e78674 100644 --- a/code/test_files/.htaccess +++ b/code/test_files/.htaccess @@ -1,3 +1,3 @@ RewriteEngine On RewriteCond %{HTTP_USER_AGENT} ^.*(AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|Diffbot|FacebookBot|facebookexternalhit|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PerplexityBot|PetalBot|Scrapy|Sidetrade\ indexer\ bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot).*$ [NC] -RewriteRule .* - [F,L] \ No newline at end of file +RewriteRule !^/?robots\.txt$ - [F,L]