restrict scope

This commit is contained in:
Chenghao Mou 2024-08-06 19:33:49 +01:00
parent 8c6482fb45
commit 2a3685385c

View file

@ -13,14 +13,14 @@ to_include = [
"AI Assistants",
"AI Data Scrapers",
"AI Search Crawlers",
"Archivers",
"Developer Helpers",
"Fetchers",
"Intelligence Gatherers",
"Scrapers",
"Search Engine Crawlers",
"SEO Crawlers",
"Uncategorized",
# "Archivers",
# "Developer Helpers",
# "Fetchers",
# "Intelligence Gatherers",
# "Scrapers",
# "Search Engine Crawlers",
# "SEO Crawlers",
# "Uncategorized",
"Undocumented AI Agents"
]
@ -29,8 +29,7 @@ for section in soup.find_all("div", {"class": "agent-links-section"}):
for agent in section.find_all("a", href=True):
name = agent.find("div", {"class": "agent-name"}).get_text().strip()
desc = agent.find("p").get_text().strip()
# TODO: there seems to be a typo?
default_values = {
"Unclear at this time.",
"No information. provided.",
@ -39,6 +38,7 @@ for section in soup.find_all("div", {"class": "agent-links-section"}):
}
default_value = "Unclear at this time."
# Parse the operator information from the description if possible
operator = default_value
if "operated by " in desc:
try:
@ -46,7 +46,6 @@ for section in soup.find_all("div", {"class": "agent-links-section"}):
except Exception as e:
print(f"Error: {e}")
def consolidate(field: str, value: str) -> str:
# New entry
if name not in existing_content:
@ -55,7 +54,7 @@ for section in soup.find_all("div", {"class": "agent-links-section"}):
if field not in existing_content[name]:
return value
# Unclear value
if existing_content[name][field] in default_values:
if existing_content[name][field] in default_values and value not in default_values:
return value
# Existing value
return existing_content[name][field]