mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-05-19 16:53:11 +00:00
Merge branch 'main' into main
This commit is contained in:
commit
6baa7725b3
10 changed files with 160 additions and 7 deletions
|
@ -178,12 +178,19 @@ def json_to_nginx(robot_json):
|
|||
config = f"if ($http_user_agent ~* \"{list_to_pcre(robot_json.keys())}\") {{\n return 403;\n}}"
|
||||
return config
|
||||
|
||||
|
||||
def json_to_caddy(robot_json):
|
||||
caddyfile = "@aibots {\n "
|
||||
caddyfile += f' header_regexp User-Agent "{list_to_pcre(robot_json.keys())}"'
|
||||
caddyfile += "\n}"
|
||||
return caddyfile
|
||||
|
||||
def json_to_haproxy(robots_json):
|
||||
# Creates a source file for HAProxy. Follow instructions in the README to implement it.
|
||||
txt = "\n".join(f"{k}" for k in robots_json.keys())
|
||||
return txt
|
||||
|
||||
|
||||
|
||||
def update_file_if_changed(file_name, converter):
|
||||
"""Update files if newer content is available and log the (in)actions."""
|
||||
|
@ -217,6 +224,10 @@ def conversions():
|
|||
update_file_if_changed(
|
||||
file_name="./Caddyfile",
|
||||
converter=json_to_caddy
|
||||
|
||||
update_file_if_changed(
|
||||
file_name="./haproxy-block-ai-bots.txt",
|
||||
converter=json_to_haproxy,
|
||||
)
|
||||
|
||||
|
||||
|
|
47
code/test_files/haproxy-block-ai-bots.txt
Normal file
47
code/test_files/haproxy-block-ai-bots.txt
Normal file
|
@ -0,0 +1,47 @@
|
|||
AI2Bot
|
||||
Ai2Bot-Dolma
|
||||
Amazonbot
|
||||
anthropic-ai
|
||||
Applebot
|
||||
Applebot-Extended
|
||||
Bytespider
|
||||
CCBot
|
||||
ChatGPT-User
|
||||
Claude-Web
|
||||
ClaudeBot
|
||||
cohere-ai
|
||||
Diffbot
|
||||
FacebookBot
|
||||
facebookexternalhit
|
||||
FriendlyCrawler
|
||||
Google-Extended
|
||||
GoogleOther
|
||||
GoogleOther-Image
|
||||
GoogleOther-Video
|
||||
GPTBot
|
||||
iaskspider/2.0
|
||||
ICC-Crawler
|
||||
ImagesiftBot
|
||||
img2dataset
|
||||
ISSCyberRiskCrawler
|
||||
Kangaroo Bot
|
||||
Meta-ExternalAgent
|
||||
Meta-ExternalFetcher
|
||||
OAI-SearchBot
|
||||
omgili
|
||||
omgilibot
|
||||
Perplexity-User
|
||||
PerplexityBot
|
||||
PetalBot
|
||||
Scrapy
|
||||
Sidetrade indexer bot
|
||||
Timpibot
|
||||
VelenPublicWebCrawler
|
||||
Webzio-Extended
|
||||
YouBot
|
||||
crawler.with.dots
|
||||
star***crawler
|
||||
Is this a crawler?
|
||||
a[mazing]{42}(robot)
|
||||
2^32$
|
||||
curl|sudo bash
|
|
@ -4,7 +4,7 @@
|
|||
import json
|
||||
import unittest
|
||||
|
||||
from robots import json_to_txt, json_to_table, json_to_htaccess, json_to_nginx, json_to_caddy
|
||||
from robots import json_to_txt, json_to_table, json_to_htaccess, json_to_nginx, json_to_haproxy, json_to_caddy
|
||||
|
||||
class RobotsUnittestExtensions:
|
||||
def loadJson(self, pathname):
|
||||
|
@ -60,6 +60,16 @@ class TestNginxConfigGeneration(unittest.TestCase, RobotsUnittestExtensions):
|
|||
robots_nginx = json_to_nginx(self.robots_dict)
|
||||
self.assertEqualsFile("test_files/nginx-block-ai-bots.conf", robots_nginx)
|
||||
|
||||
class TestHaproxyConfigGeneration(unittest.TestCase, RobotsUnittestExtensions):
|
||||
maxDiff = 8192
|
||||
|
||||
def setUp(self):
|
||||
self.robots_dict = self.loadJson("test_files/robots.json")
|
||||
|
||||
def test_haproxy_generation(self):
|
||||
robots_haproxy = json_to_haproxy(self.robots_dict)
|
||||
self.assertEqualsFile("test_files/haproxy-block-ai-bots.txt", robots_haproxy)
|
||||
|
||||
class TestRobotsNameCleaning(unittest.TestCase):
|
||||
def test_clean_name(self):
|
||||
from robots import clean_robot_name
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue