chore: restore FriendlyCrawler + ImageSift

2025-05-20 09:13:11 +00:00 · 2024-08-04 12:28:48 -07:00 · 2024-08-04 12:28:48 -07:00 · 1ca936ce11
commit 1ca936ce11
parent 8de5bc8e01
1 changed files with 14 additions and 0 deletions
--- a/robots.json
+++ b/robots.json
@ -83,6 +83,13 @@
        "frequency": "Unclear at this time.",
        "description": "Unclear at this time."
    },
+    "FriendlyCrawler": {
+        "operator": "Unknown",
+        "respect": "[Yes](https:\/\/imho.alex-kunz.com\/2024\/01\/25\/an-update-on-friendly-crawler)",
+        "function": "We are using the data from the crawler to build datasets for machine learning experiments.",
+        "frequency": "Unclear at this time.",
+        "description": "Unclear who the operator is; but data is used for training/machine learning."
+    },
    "Google-Extended": {
        "operator": "Google",
        "respect": "[Yes](https:\/\/developers.google.com\/search\/docs\/crawling-indexing\/overview-google-crawlers)",
@ -125,6 +132,13 @@
        "frequency": "No information.",
        "description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business."
    },
+    "ImageSift": {
+        "operator": "[ImageSift](https:\/\/imagesift.com)",
+        "respect": "[Yes](https:\/\/imagesift.com\/about)",
+        "function": "ImageSiftBot is a web crawler that scrapes the internet for publicly available images to support our suite of web intelligence products",
+        "frequency": "No information.",
+        "description": "Once images and text are downloaded from a webpage, ImageSift analyzes this data from the page and stores the information in an index. Our web intelligence products use this index to enable search and retrieval of similar images."
+    },
    "img2dataset": {
        "operator": "[img2dataset](https:\/\/github.com\/rom1504\/img2dataset)",
        "respect": "Unclear at this time.",