chore: add Diffbot and scoopit

This commit is contained in:
Cory Dransfeldt 2024-05-05 14:50:04 -07:00
parent 29e0799332
commit dea035365f
No known key found for this signature in database
5 changed files with 8 additions and 2 deletions

View file

@ -29,6 +29,7 @@ const botUas = [
'Claude-Web', 'Claude-Web',
'cohere-ai', 'cohere-ai',
'DataForSeoBot', 'DataForSeoBot',
'Diffbot',
'FacebookBot', 'FacebookBot',
'FriendlyCrawler', 'FriendlyCrawler',
'Google-Extended', 'Google-Extended',
@ -44,6 +45,7 @@ const botUas = [
'peer39_crawler/1.0', 'peer39_crawler/1.0',
'PerplexityBot', 'PerplexityBot',
'PiplBot', 'PiplBot',
'scoop.it',
'Seekr', 'Seekr',
'YouBot', 'YouBot',
] ]

View file

@ -11,6 +11,7 @@ User-agent: ClaudeBot
User-agent: Claude-Web User-agent: Claude-Web
User-agent: cohere-ai User-agent: cohere-ai
User-agent: DataForSeoBot User-agent: DataForSeoBot
User-agent: Diffbot
User-agent: FacebookBot User-agent: FacebookBot
User-agent: FriendlyCrawler User-agent: FriendlyCrawler
User-agent: Google-Extended User-agent: Google-Extended
@ -26,6 +27,7 @@ User-agent: peer39_crawler
User-agent: peer39_crawler/1.0 User-agent: peer39_crawler/1.0
User-agent: PerplexityBot User-agent: PerplexityBot
User-agent: PiplBot User-agent: PiplBot
User-agent: scoop.it
User-agent: Seekr User-agent: Seekr
User-agent: YouBot User-agent: YouBot
Disallow: / Disallow: /

View file

@ -1,4 +1,4 @@
# for apache2.conf or .htaccess; intended to block via user agent string # for apache2.conf or .htaccess; intended to block via user agent string
RewriteEngine On RewriteEngine On
RewriteCond %{HTTP_USER_AGENT} (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|Seekr|YouBot) [NC] RewriteCond %{HTTP_USER_AGENT} (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|Diffbot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|scoop.it|Seekr|YouBot) [NC]
RewriteRule .* - [F,L] RewriteRule .* - [F,L]

View file

@ -1,6 +1,6 @@
# for nginx.conf; intended to block via user agent string # for nginx.conf; intended to block via user agent string
# note: we recommend following the approach outlined here https://underlap.org/nginx-robot-access # note: we recommend following the approach outlined here https://underlap.org/nginx-robot-access
# and contributing if you're able: https://github.com/glyn/nginx_robot_access # and contributing if you're able: https://github.com/glyn/nginx_robot_access
if ($http_user_agent ~* (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|Seekr|YouBot)) { if ($http_user_agent ~* (AdsBot-Google|Amazonbot|anthropic-ai|Applebot|AwarioRssBot|AwarioSmartBot|Bytespider|CCBot|ChatGPT-User|ClaudeBot|Claude-Web|cohere-ai|DataForSeoBot|Diffbot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GPTBot|img2dataset|ImagesiftBot|magpie-crawler|Meltwater|omgili|omgilibot|peer39_crawler|peer39_crawler/1.0|PerplexityBot|PiplBot|scoop.it|Seekr|YouBot)) {
return 403; return 403;
} }

View file

@ -13,6 +13,7 @@
|Claude-Web | | | | | | |Claude-Web | | | | | |
|coher-ai | | | | | | |coher-ai | | | | | |
|DataForSeoBot | | | | | | |DataForSeoBot | | | | | |
|Diffbot | | | | | |
|FacebookBot | | | | | | |FacebookBot | | | | | |
|Google-Extended| | | | | | |Google-Extended| | | | | |
|GoogleOther | | | | | | |GoogleOther | | | | | |
@ -27,5 +28,6 @@
|peer39_crawler/1.0| | | | | | |peer39_crawler/1.0| | | | | |
|PerplexityBot | | | | | | |PerplexityBot | | | | | |
|PiplBot | | | | | | |PiplBot | | | | | |
|scoop.it | | | | | |
|Seekr | | | | | | |Seekr | | | | | |
|YouBot | | | | | | |YouBot | | | | | |