From 89d4c6e5ca03f0aedec09b9191e2aece6f2efec3 Mon Sep 17 00:00:00 2001
From: "ai.robots.txt" <ai.robots.txt@users.noreply.github.com>
Date: Sat, 1 Feb 2025 10:51:01 +0000
Subject: [PATCH] Merge pull request #73 from nisbet-hubbard/patch-8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Actually block Semrush’s AI tools
---
 .htaccess               | 2 +-
 robots.txt              | 3 ++-
 table-of-bot-metrics.md | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.htaccess b/.htaccess
index beaddc3..97482e2 100644
--- a/.htaccess
+++ b/.htaccess
@@ -1,3 +1,3 @@
 RewriteEngine On
-RewriteCond %{HTTP_USER_AGENT} ^.*(AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot|Sidetrade\ indexer\ bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot).*$ [NC]
+RewriteCond %{HTTP_USER_AGENT} ^.*(AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade\ indexer\ bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot).*$ [NC]
 RewriteRule .* - [F,L]
\ No newline at end of file
diff --git a/robots.txt b/robots.txt
index fd388fd..3839e55 100644
--- a/robots.txt
+++ b/robots.txt
@@ -36,7 +36,8 @@ User-agent: PanguBot
 User-agent: PerplexityBot
 User-agent: PetalBot
 User-agent: Scrapy
-User-agent: SemrushBot
+User-agent: SemrushBot-OCOB
+User-agent: SemrushBot-SWA
 User-agent: Sidetrade indexer bot
 User-agent: Timpibot
 User-agent: VelenPublicWebCrawler
diff --git a/table-of-bot-metrics.md b/table-of-bot-metrics.md
index f44c585..b51bbae 100644
--- a/table-of-bot-metrics.md
+++ b/table-of-bot-metrics.md
@@ -38,7 +38,8 @@
 | PerplexityBot | [Perplexity](https://www.perplexity.ai/) | [No](https://www.macstories.net/stories/wired-confirms-perplexity-is-bypassing-efforts-by-websites-to-block-its-web-crawler/) | Used to answer queries at the request of users. | Takes action based on user prompts. | Operated by Perplexity to obtain results in response to user queries. |
 | PetalBot | [Huawei](https://huawei.com/) | Yes | Used to provide recommendations in Hauwei assistant and AI search services. | No explicit frequency provided. | Operated by Huawei to provide search and AI assistant services. |
 | Scrapy | [Zyte](https://www.zyte.com) | Unclear at this time. | Scrapes data for a variety of uses including training AI. | No information. | "AI and machine learning applications often need large amounts of quality data, and web data extraction is a fast, efficient way to build structured data sets." |
-| SemrushBot | [Semrush](https://www.semrush.com/) | [Yes](https://www.semrush.com/bot/) | Scrapes data for use in LLM article-writing tool. | Roughly once every 10 seconds. | SemrushBot is a bot which, among other functions, scrapes data for use in ContentShake AI tool reports. |
+| SemrushBot-OCOB | [Semrush](https://www.semrush.com/) | [Yes](https://www.semrush.com/bot/) | Crawls your site for ContentShake AI tool. | Roughly once every 10 seconds. | You enter one text (on-demand) and we will make suggestions on it (the tool uses AI but we are not actively crawling the web, you need to manually enter one text/URL). |
+| SemrushBot-SWA | [Semrush](https://www.semrush.com/) | [Yes](https://www.semrush.com/bot/) | Checks URLs on your site for SWA tool. | Roughly once every 10 seconds. | You enter one text (on-demand) and we will make suggestions on it (the tool uses AI but we are not actively crawling the web, you need to manually enter one text/URL). |
 | Sidetrade indexer bot | [Sidetrade](https://www.sidetrade.com) | Unclear at this time. | Extracts data for a variety of uses including training AI. | No information. | AI product training. |
 | Timpibot | [Timpi](https://timpi.io) | Unclear at this time. | Scrapes data for use in training LLMs. | No information. | Makes data available for training AI models. |
 | VelenPublicWebCrawler | [Velen Crawler](https://velen.io) | [Yes](https://velen.io) | Scrapes data for business data sets and machine learning models. | No information. | "Our goal with this crawler is to build business datasets and machine learning models to better understand the web." |