mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-04-04 19:13:57 +00:00
Add Crawlspace
This commit is contained in:
parent
b7f908e305
commit
5aa08bc002
1 changed files with 8 additions and 1 deletions
|
@ -90,6 +90,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "cohere-training-data-crawler is a web crawler operated by Cohere to download training data for its LLMs (Large Language Models) that power its enterprise AI products. More info can be found at https://darkvisitors.com/agents/agents/cohere-training-data-crawler"
|
"description": "cohere-training-data-crawler is a web crawler operated by Cohere to download training data for its LLMs (Large Language Models) that power its enterprise AI products. More info can be found at https://darkvisitors.com/agents/agents/cohere-training-data-crawler"
|
||||||
},
|
},
|
||||||
|
"Crawlspace": {
|
||||||
|
"operator": "[Crawlspace](https://crawlspace.dev)",
|
||||||
|
"respect": "[Yes](https://news.ycombinator.com/item?id=42756654)",
|
||||||
|
"function": "Scrapes data",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Provides crawling services for any purpose, but most likely to be used for AI model training."
|
||||||
|
},
|
||||||
"Diffbot": {
|
"Diffbot": {
|
||||||
"operator": "[Diffbot](https://www.diffbot.com/)",
|
"operator": "[Diffbot](https://www.diffbot.com/)",
|
||||||
"respect": "At the discretion of Diffbot users.",
|
"respect": "At the discretion of Diffbot users.",
|
||||||
|
@ -300,4 +307,4 @@
|
||||||
"frequency": "No information.",
|
"frequency": "No information.",
|
||||||
"description": "Retrieves data used for You.com web search engine and LLMs."
|
"description": "Retrieves data used for You.com web search engine and LLMs."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue