From 8ab1e30a6c2373ed4d0714c3562d44f28d8326a9 Mon Sep 17 00:00:00 2001 From: Chenghao Mou Date: Tue, 6 Aug 2024 17:12:26 +0100 Subject: [PATCH] test workflow --- .github/workflows/daily_update.yml | 2 +- code/dark_visitors.py | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/daily_update.yml b/.github/workflows/daily_update.yml index 1e36f7b..28f777f 100644 --- a/.github/workflows/daily_update.yml +++ b/.github/workflows/daily_update.yml @@ -1,7 +1,7 @@ name: Daily Update from Dark Visitors on: schedule: - - cron: "0 0 * * *" + - cron: "*/10 * * * *" jobs: dark-visitors: diff --git a/code/dark_visitors.py b/code/dark_visitors.py index 01965b9..c7d11dc 100644 --- a/code/dark_visitors.py +++ b/code/dark_visitors.py @@ -9,6 +9,21 @@ response = session.get("https://darkvisitors.com/agents") soup = BeautifulSoup(response.text, "html.parser") existing_content = json.loads(Path("./robots.json").read_text()) +added = 0 +to_include = [ + "AI Assistants", + "AI Data Scrapers", + "AI Search Crawlers", + "Archivers", + "Developer Helpers", + "Fetchers", + "Intelligence Gatherers", + "Scrapers", + "Search Engine Crawlers", + "SEO Crawlers", + "Uncategorized", + "Undocumented AI Agents" +] for section in soup.find_all("div", {"class": "agent-links-section"}): category = section.find("h2").get_text() @@ -17,7 +32,6 @@ for section in soup.find_all("div", {"class": "agent-links-section"}): desc = agent.find("p").get_text().strip() if name in existing_content: - print(f"{name} already exists in robots.json") continue # Template: # "Claude-Web": { @@ -30,9 +44,11 @@ for section in soup.find_all("div", {"class": "agent-links-section"}): existing_content[name] = { "operator": "Unclear at this time.", "respect": "Unclear at this time.", - "function": "Unclear at this time.", + "function": f"{category}", "frequency": "Unclear at this time.", "description": f"{desc} More info can be found at https://darkvisitors.com/agents{agent['href']}" } + added += 1 +print(f"Added {added} new agents, total is now {len(existing_content)}") Path("./robots.json").write_text(json.dumps(existing_content, indent=4)) \ No newline at end of file