From 1ca936ce115dad316417c571766106e7cde07ec8 Mon Sep 17 00:00:00 2001 From: Cory Dransfeldt Date: Sun, 4 Aug 2024 12:28:48 -0700 Subject: [PATCH] chore: restore FriendlyCrawler + ImageSift --- robots.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/robots.json b/robots.json index b61946f..5427572 100644 --- a/robots.json +++ b/robots.json @@ -83,6 +83,13 @@ "frequency": "Unclear at this time.", "description": "Unclear at this time." }, + "FriendlyCrawler": { + "operator": "Unknown", + "respect": "[Yes](https:\/\/imho.alex-kunz.com\/2024\/01\/25\/an-update-on-friendly-crawler)", + "function": "We are using the data from the crawler to build datasets for machine learning experiments.", + "frequency": "Unclear at this time.", + "description": "Unclear who the operator is; but data is used for training/machine learning." + }, "Google-Extended": { "operator": "Google", "respect": "[Yes](https:\/\/developers.google.com\/search\/docs\/crawling-indexing\/overview-google-crawlers)", @@ -125,6 +132,13 @@ "frequency": "No information.", "description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business." }, + "ImageSift": { + "operator": "[ImageSift](https:\/\/imagesift.com)", + "respect": "[Yes](https:\/\/imagesift.com\/about)", + "function": "ImageSiftBot is a web crawler that scrapes the internet for publicly available images to support our suite of web intelligence products", + "frequency": "No information.", + "description": "Once images and text are downloaded from a webpage, ImageSift analyzes this data from the page and stores the information in an index. Our web intelligence products use this index to enable search and retrieval of similar images." + }, "img2dataset": { "operator": "[img2dataset](https:\/\/github.com\/rom1504\/img2dataset)", "respect": "Unclear at this time.",