mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-05-19 08:43:11 +00:00
Added a Caddyfile converter
This commit is contained in:
parent
bbec639c14
commit
e519f9f94c
3 changed files with 68 additions and 1 deletions
57
Caddyfile
Normal file
57
Caddyfile
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
@aibots {
|
||||||
|
header User-Agent "*AI2Bot*"
|
||||||
|
header User-Agent "*Ai2Bot-Dolma*"
|
||||||
|
header User-Agent "*aiHitBot*"
|
||||||
|
header User-Agent "*Amazonbot*"
|
||||||
|
header User-Agent "*anthropic-ai*"
|
||||||
|
header User-Agent "*Applebot*"
|
||||||
|
header User-Agent "*Applebot-Extended*"
|
||||||
|
header User-Agent "*Brightbot 1.0*"
|
||||||
|
header User-Agent "*Bytespider*"
|
||||||
|
header User-Agent "*CCBot*"
|
||||||
|
header User-Agent "*ChatGPT-User*"
|
||||||
|
header User-Agent "*Claude-Web*"
|
||||||
|
header User-Agent "*ClaudeBot*"
|
||||||
|
header User-Agent "*cohere-ai*"
|
||||||
|
header User-Agent "*cohere-training-data-crawler*"
|
||||||
|
header User-Agent "*Cotoyogi*"
|
||||||
|
header User-Agent "*Crawlspace*"
|
||||||
|
header User-Agent "*Diffbot*"
|
||||||
|
header User-Agent "*DuckAssistBot*"
|
||||||
|
header User-Agent "*FacebookBot*"
|
||||||
|
header User-Agent "*Factset_spyderbot*"
|
||||||
|
header User-Agent "*FirecrawlAgent*"
|
||||||
|
header User-Agent "*FriendlyCrawler*"
|
||||||
|
header User-Agent "*Google-Extended*"
|
||||||
|
header User-Agent "*GoogleOther*"
|
||||||
|
header User-Agent "*GoogleOther-Image*"
|
||||||
|
header User-Agent "*GoogleOther-Video*"
|
||||||
|
header User-Agent "*GPTBot*"
|
||||||
|
header User-Agent "*iaskspider/2.0*"
|
||||||
|
header User-Agent "*ICC-Crawler*"
|
||||||
|
header User-Agent "*ImagesiftBot*"
|
||||||
|
header User-Agent "*img2dataset*"
|
||||||
|
header User-Agent "*imgproxy*"
|
||||||
|
header User-Agent "*ISSCyberRiskCrawler*"
|
||||||
|
header User-Agent "*Kangaroo Bot*"
|
||||||
|
header User-Agent "*Meta-ExternalAgent*"
|
||||||
|
header User-Agent "*Meta-ExternalFetcher*"
|
||||||
|
header User-Agent "*NovaAct*"
|
||||||
|
header User-Agent "*OAI-SearchBot*"
|
||||||
|
header User-Agent "*omgili*"
|
||||||
|
header User-Agent "*omgilibot*"
|
||||||
|
header User-Agent "*Operator*"
|
||||||
|
header User-Agent "*PanguBot*"
|
||||||
|
header User-Agent "*Perplexity-User*"
|
||||||
|
header User-Agent "*PerplexityBot*"
|
||||||
|
header User-Agent "*PetalBot*"
|
||||||
|
header User-Agent "*Scrapy*"
|
||||||
|
header User-Agent "*SemrushBot-OCOB*"
|
||||||
|
header User-Agent "*SemrushBot-SWA*"
|
||||||
|
header User-Agent "*Sidetrade indexer bot*"
|
||||||
|
header User-Agent "*TikTokSpider*"
|
||||||
|
header User-Agent "*Timpibot*"
|
||||||
|
header User-Agent "*VelenPublicWebCrawler*"
|
||||||
|
header User-Agent "*Webzio-Extended*"
|
||||||
|
header User-Agent "*YouBot*"
|
||||||
|
}
|
|
@ -178,6 +178,12 @@ def json_to_nginx(robot_json):
|
||||||
config = f"if ($http_user_agent ~* \"{list_to_pcre(robot_json.keys())}\") {{\n return 403;\n}}"
|
config = f"if ($http_user_agent ~* \"{list_to_pcre(robot_json.keys())}\") {{\n return 403;\n}}"
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
def json_to_caddy(robot_json):
|
||||||
|
caddyfile = "@aibots {\n "
|
||||||
|
caddyfile += "\n ".join(f'header User-Agent "*{k}*"' for k in robot_json.keys())
|
||||||
|
caddyfile += "\n}"
|
||||||
|
return caddyfile
|
||||||
|
|
||||||
|
|
||||||
def update_file_if_changed(file_name, converter):
|
def update_file_if_changed(file_name, converter):
|
||||||
"""Update files if newer content is available and log the (in)actions."""
|
"""Update files if newer content is available and log the (in)actions."""
|
||||||
|
@ -208,6 +214,10 @@ def conversions():
|
||||||
file_name="./nginx-block-ai-bots.conf",
|
file_name="./nginx-block-ai-bots.conf",
|
||||||
converter=json_to_nginx,
|
converter=json_to_nginx,
|
||||||
)
|
)
|
||||||
|
update_file_if_changed(
|
||||||
|
file_name="./Caddyfile",
|
||||||
|
converter=json_to_caddy
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -384,4 +384,4 @@
|
||||||
"frequency": "No information.",
|
"frequency": "No information.",
|
||||||
"description": "Retrieves data used for You.com web search engine and LLMs."
|
"description": "Retrieves data used for You.com web search engine and LLMs."
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Add table
Add a link
Reference in a new issue