From 7c3b5a2cb21f5404cf4e2af1acf8689ba77d7b06 Mon Sep 17 00:00:00 2001 From: Thomas Leister Date: Thu, 27 Mar 2025 16:12:18 +0100 Subject: [PATCH] Add tests for Nginx config generator --- code/test_files/nginx-block-ai-bots.conf | 3 +++ code/tests.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 code/test_files/nginx-block-ai-bots.conf diff --git a/code/test_files/nginx-block-ai-bots.conf b/code/test_files/nginx-block-ai-bots.conf new file mode 100644 index 0000000..d1b559e --- /dev/null +++ b/code/test_files/nginx-block-ai-bots.conf @@ -0,0 +1,3 @@ +if ($http_user_agent ~* "(AI2Bot|Ai2Bot\-Dolma|Amazonbot|anthropic\-ai|Applebot|Applebot\-Extended|Bytespider|CCBot|ChatGPT\-User|Claude\-Web|ClaudeBot|cohere\-ai|Diffbot|FacebookBot|facebookexternalhit|FriendlyCrawler|Google\-Extended|GoogleOther|GoogleOther\-Image|GoogleOther\-Video|GPTBot|iaskspider/2\.0|ICC\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|Meta\-ExternalAgent|Meta\-ExternalFetcher|OAI\-SearchBot|omgili|omgilibot|PerplexityBot|PetalBot|Scrapy|Sidetrade\ indexer\ bot|Timpibot|VelenPublicWebCrawler|Webzio\-Extended|YouBot|crawler\.with\.dots|star\*\*\*crawler|Is\ this\ a\ crawler\?|a\[mazing\]\{42\}\(robot\)|2\^32\$|curl\|sudo\ bash)") { + return 403; +} \ No newline at end of file diff --git a/code/tests.py b/code/tests.py index 94cbb47..61d69b4 100755 --- a/code/tests.py +++ b/code/tests.py @@ -4,7 +4,7 @@ import json import unittest -from robots import json_to_txt, json_to_table, json_to_htaccess +from robots import json_to_txt, json_to_table, json_to_htaccess, json_to_nginx class RobotsUnittestExtensions: def loadJson(self, pathname): @@ -50,6 +50,16 @@ class TestHtaccessGeneration(unittest.TestCase, RobotsUnittestExtensions): robots_htaccess = json_to_htaccess(self.robots_dict) self.assertEqualsFile("test_files/.htaccess", robots_htaccess) +class TestNginxConfigGeneration(unittest.TestCase, RobotsUnittestExtensions): + maxDiff = 8192 + + def setUp(self): + self.robots_dict = self.loadJson("test_files/robots.json") + + def test_nginx_generation(self): + robots_nginx = json_to_nginx(self.robots_dict) + self.assertEqualsFile("test_files/nginx-block-ai-bots.conf", robots_nginx) + if __name__ == "__main__": import os