mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-04-04 19:13:57 +00:00
Merge pull request #42 from commoncrawl/main
feat: make CCBot entry more accurate
This commit is contained in:
commit
2f67e77ddb
1 changed files with 4 additions and 4 deletions
|
@ -42,10 +42,10 @@
|
|||
"respect": "No"
|
||||
},
|
||||
"CCBot": {
|
||||
"description": "Sources data that is made openly available and is used to train AI models.",
|
||||
"frequency": "Unclear at this time.",
|
||||
"function": "Provides crawl data for an open source repository that has been used to train LLMs.",
|
||||
"operator": "[Common Crawl](https://commoncrawl.org)",
|
||||
"description": "Web archive going back to 2008. [Cited in thousands of research papers per year](https://commoncrawl.org/research-papers).",
|
||||
"frequency": "Monthly at present.",
|
||||
"function": "Provides open crawl dataset, used for many purposes, including Machine Learning/AI.",
|
||||
"operator": "[Common Crawl Foundation](https://commoncrawl.org)",
|
||||
"respect": "[Yes](https://commoncrawl.org/ccbot)"
|
||||
},
|
||||
"ChatGPT-User": {
|
||||
|
|
Loading…
Reference in a new issue