chore: restore FriendlyCrawler + ImageSift

This commit is contained in:
Cory Dransfeldt 2024-08-04 12:28:48 -07:00
parent 8de5bc8e01
commit 1ca936ce11
No known key found for this signature in database

View file

@ -83,6 +83,13 @@
"frequency": "Unclear at this time.",
"description": "Unclear at this time."
},
"FriendlyCrawler": {
"operator": "Unknown",
"respect": "[Yes](https:\/\/imho.alex-kunz.com\/2024\/01\/25\/an-update-on-friendly-crawler)",
"function": "We are using the data from the crawler to build datasets for machine learning experiments.",
"frequency": "Unclear at this time.",
"description": "Unclear who the operator is; but data is used for training/machine learning."
},
"Google-Extended": {
"operator": "Google",
"respect": "[Yes](https:\/\/developers.google.com\/search\/docs\/crawling-indexing\/overview-google-crawlers)",
@ -125,6 +132,13 @@
"frequency": "No information.",
"description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business."
},
"ImageSift": {
"operator": "[ImageSift](https:\/\/imagesift.com)",
"respect": "[Yes](https:\/\/imagesift.com\/about)",
"function": "ImageSiftBot is a web crawler that scrapes the internet for publicly available images to support our suite of web intelligence products",
"frequency": "No information.",
"description": "Once images and text are downloaded from a webpage, ImageSift analyzes this data from the page and stores the information in an index. Our web intelligence products use this index to enable search and retrieval of similar images."
},
"img2dataset": {
"operator": "[img2dataset](https:\/\/github.com\/rom1504\/img2dataset)",
"respect": "Unclear at this time.",