diff --git a/code/dark_visitors.py b/code/dark_visitors.py index 2a84d58..3b9775b 100644 --- a/code/dark_visitors.py +++ b/code/dark_visitors.py @@ -13,14 +13,14 @@ to_include = [ "AI Assistants", "AI Data Scrapers", "AI Search Crawlers", - "Archivers", - "Developer Helpers", - "Fetchers", - "Intelligence Gatherers", - "Scrapers", - "Search Engine Crawlers", - "SEO Crawlers", - "Uncategorized", + # "Archivers", + # "Developer Helpers", + # "Fetchers", + # "Intelligence Gatherers", + # "Scrapers", + # "Search Engine Crawlers", + # "SEO Crawlers", + # "Uncategorized", "Undocumented AI Agents" ] @@ -29,8 +29,7 @@ for section in soup.find_all("div", {"class": "agent-links-section"}): for agent in section.find_all("a", href=True): name = agent.find("div", {"class": "agent-name"}).get_text().strip() desc = agent.find("p").get_text().strip() - - # TODO: there seems to be a typo? + default_values = { "Unclear at this time.", "No information. provided.", @@ -39,6 +38,7 @@ for section in soup.find_all("div", {"class": "agent-links-section"}): } default_value = "Unclear at this time." + # Parse the operator information from the description if possible operator = default_value if "operated by " in desc: try: @@ -46,7 +46,6 @@ for section in soup.find_all("div", {"class": "agent-links-section"}): except Exception as e: print(f"Error: {e}") - def consolidate(field: str, value: str) -> str: # New entry if name not in existing_content: @@ -55,7 +54,7 @@ for section in soup.find_all("div", {"class": "agent-links-section"}): if field not in existing_content[name]: return value # Unclear value - if existing_content[name][field] in default_values: + if existing_content[name][field] in default_values and value not in default_values: return value # Existing value return existing_content[name][field]