From 5aa08bc0022e8e9960e4cf52359ca2d910f795bf Mon Sep 17 00:00:00 2001 From: Joshua Sheard Date: Sun, 19 Jan 2025 22:03:50 +0000 Subject: [PATCH 1/2] Add Crawlspace --- robots.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/robots.json b/robots.json index c444cb4..d71c80b 100644 --- a/robots.json +++ b/robots.json @@ -90,6 +90,13 @@ "frequency": "Unclear at this time.", "description": "cohere-training-data-crawler is a web crawler operated by Cohere to download training data for its LLMs (Large Language Models) that power its enterprise AI products. More info can be found at https://darkvisitors.com/agents/agents/cohere-training-data-crawler" }, + "Crawlspace": { + "operator": "[Crawlspace](https://crawlspace.dev)", + "respect": "[Yes](https://news.ycombinator.com/item?id=42756654)", + "function": "Scrapes data", + "frequency": "Unclear at this time.", + "description": "Provides crawling services for any purpose, but most likely to be used for AI model training." + }, "Diffbot": { "operator": "[Diffbot](https://www.diffbot.com/)", "respect": "At the discretion of Diffbot users.", @@ -300,4 +307,4 @@ "frequency": "No information.", "description": "Retrieves data used for You.com web search engine and LLMs." } -} \ No newline at end of file +} From 7427d96bac08d59276292ca7a66d77365f7d26b9 Mon Sep 17 00:00:00 2001 From: Joshua Sheard Date: Mon, 20 Jan 2025 10:59:02 +0000 Subject: [PATCH 2/2] Update robots.json Co-authored-by: Glyn Normington --- robots.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/robots.json b/robots.json index d71c80b..465a61c 100644 --- a/robots.json +++ b/robots.json @@ -95,7 +95,7 @@ "respect": "[Yes](https://news.ycombinator.com/item?id=42756654)", "function": "Scrapes data", "frequency": "Unclear at this time.", - "description": "Provides crawling services for any purpose, but most likely to be used for AI model training." + "description": "Provides crawling services for any purpose, probably including AI model training." }, "Diffbot": { "operator": "[Diffbot](https://www.diffbot.com/)",