mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-04-05 03:17:46 +00:00
chore: add Diffbot and scoopit
This commit is contained in:
parent
29e0799332
commit
dea035365f
5 changed files with 8 additions and 2 deletions
|
@ -29,6 +29,7 @@ const botUas = [
|
|||
'Claude-Web',
|
||||
'cohere-ai',
|
||||
'DataForSeoBot',
|
||||
'Diffbot',
|
||||
'FacebookBot',
|
||||
'FriendlyCrawler',
|
||||
'Google-Extended',
|
||||
|
@ -44,6 +45,7 @@ const botUas = [
|
|||
'peer39_crawler/1.0',
|
||||
'PerplexityBot',
|
||||
'PiplBot',
|
||||
'scoop.it',
|
||||
'Seekr',
|
||||
'YouBot',
|
||||
]
|
||||
|
|
|
@ -11,6 +11,7 @@ User-agent: ClaudeBot
|
|||
User-agent: Claude-Web
|
||||
User-agent: cohere-ai
|
||||
User-agent: DataForSeoBot
|
||||
User-agent: Diffbot
|
||||
User-agent: FacebookBot
|
||||
User-agent: FriendlyCrawler
|
||||
User-agent: Google-Extended
|
||||
|
@ -26,6 +27,7 @@ User-agent: peer39_crawler
|
|||
User-agent: peer39_crawler/1.0
|
||||
User-agent: PerplexityBot
|
||||
User-agent: PiplBot
|
||||
User-agent: scoop.it
|
||||
User-agent: Seekr
|
||||
User-agent: YouBot
|
||||
Disallow: /
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# for apache2.conf or .htaccess; intended to block via user agent string
|
||||
RewriteEngine On
|
||||
RewriteCond %{HTTP_USER_AGENT} (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|Seekr|YouBot) [NC]
|
||||
RewriteCond %{HTTP_USER_AGENT} (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|Diffbot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|scoop.it|Seekr|YouBot) [NC]
|
||||
RewriteRule .* - [F,L]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# for nginx.conf; intended to block via user agent string
|
||||
# note: we recommend following the approach outlined here https://underlap.org/nginx-robot-access
|
||||
# and contributing if you're able: https://github.com/glyn/nginx_robot_access
|
||||
if ($http_user_agent ~* (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|Seekr|YouBot)) {
|
||||
if ($http_user_agent ~* (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|Diffbot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|scoop.it|Seekr|YouBot)) {
|
||||
return 403;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|Claude-Web | | | | | |
|
||||
|coher-ai | | | | | |
|
||||
|DataForSeoBot | | | | | |
|
||||
|Diffbot | | | | | |
|
||||
|FacebookBot | | | | | |
|
||||
|Google-Extended| | | | | |
|
||||
|GoogleOther | | | | | |
|
||||
|
@ -27,5 +28,6 @@
|
|||
|peer39_crawler/1.0| | | | | |
|
||||
|PerplexityBot | | | | | |
|
||||
|PiplBot | | | | | |
|
||||
|scoop.it | | | | | |
|
||||
|Seekr | | | | | |
|
||||
|YouBot | | | | | |
|
||||
|
|
Loading…
Reference in a new issue