mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-04-19 00:26:01 +00:00
chore: add Diffbot and scoopit
This commit is contained in:
parent
29e0799332
commit
dea035365f
5 changed files with 8 additions and 2 deletions
|
@ -1,4 +1,4 @@
|
|||
# for apache2.conf or .htaccess; intended to block via user agent string
|
||||
RewriteEngine On
|
||||
RewriteCond %{HTTP_USER_AGENT} (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|Seekr|YouBot) [NC]
|
||||
RewriteCond %{HTTP_USER_AGENT} (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|Diffbot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|scoop.it|Seekr|YouBot) [NC]
|
||||
RewriteRule .* - [F,L]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# for nginx.conf; intended to block via user agent string
|
||||
# note: we recommend following the approach outlined here https://underlap.org/nginx-robot-access
|
||||
# and contributing if you're able: https://github.com/glyn/nginx_robot_access
|
||||
if ($http_user_agent ~* (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|Seekr|YouBot)) {
|
||||
if ($http_user_agent ~* (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|Diffbot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|scoop.it|Seekr|YouBot)) {
|
||||
return 403;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue