From b5cc034f1e68fa61b7a294e1b7f78df2032489d1 Mon Sep 17 00:00:00 2001 From: Cory Dransfeldt Date: Sat, 2 Mar 2024 11:23:48 -0800 Subject: [PATCH] chore: update robots + posts --- src/_data/json/robots.js | 1 + src/posts/2024/go-ahead-and-block-ai-web-crawlers.md | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/src/_data/json/robots.js b/src/_data/json/robots.js index bee1a7ef..67d4e9b5 100644 --- a/src/_data/json/robots.js +++ b/src/_data/json/robots.js @@ -14,6 +14,7 @@ export default [ 'FacebookBot', 'Google-Extended', 'GPTBot', + 'ImagesiftBot', 'magpie-crawler', 'omgili', 'omgilibot', diff --git a/src/posts/2024/go-ahead-and-block-ai-web-crawlers.md b/src/posts/2024/go-ahead-and-block-ai-web-crawlers.md index 9d8e2419..728e794f 100644 --- a/src/posts/2024/go-ahead-and-block-ai-web-crawlers.md +++ b/src/posts/2024/go-ahead-and-block-ai-web-crawlers.md @@ -83,6 +83,9 @@ Disallow: / User-agent: GPTBot Disallow: / +User-agent: ImagesiftBot +Disallow: / + User-agent: magpie-crawler Disallow: / @@ -105,4 +108,8 @@ User-agent: YouBot Disallow: / ``` +**Other great posts on the subject:** +- [I’m blocking AI-crawlers](https://roelant.net/en/2023/im-blocking-ai-crawlers/) +- [Block the Bots that Feed “AI” Models by Scraping Your Website](https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/) + [^1]: I've yet to definitively identify Arc Search's user agent but I'd like to, so I can block it and share it — but that assumes they respect `robots.txt` declarations. \ No newline at end of file