From 3ab22bc49887325dde1ce74d0b5952fcef87e2ea Mon Sep 17 00:00:00 2001 From: fabianegli Date: Sat, 19 Oct 2024 19:56:41 +0200 Subject: [PATCH] make conversions and updates separately triggerable --- .github/workflows/ai_robots_update.yml | 13 ++++++++--- code/dark_visitors.py | 30 ++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ai_robots_update.yml b/.github/workflows/ai_robots_update.yml index ea5c760..b346e10 100644 --- a/.github/workflows/ai_robots_update.yml +++ b/.github/workflows/ai_robots_update.yml @@ -18,10 +18,17 @@ jobs: pip install beautifulsoup4 requests git config --global user.name "dark-visitors" git config --global user.email "dark-visitors@users.noreply.github.com" - echo "Running update script ..." - python code/dark_visitors.py + echo "Updating robots.json with data from darkvisitor.com ..." + python code/dark_visitors.py --update echo "... done." git --no-pager diff git add -A - git diff --quiet && git diff --staged --quiet || (git commit -m "Daily update from Dark Visitors" && git push) + git diff --quiet && git diff --staged --quiet || (git commit -m "Update from Dark Visitors" && git push) + + echo "Updating robots.txt and table-of-bot-metrics.md if necessary ..." + python code/dark_visitors.py --convert + echo "... done." + git --no-pager diff + git add -A + git diff --quiet && git diff --staged --quiet || (git commit -m "Updated from new robots.json" && git push) shell: bash diff --git a/code/dark_visitors.py b/code/dark_visitors.py index 820c9c1..cf44e8e 100644 --- a/code/dark_visitors.py +++ b/code/dark_visitors.py @@ -153,5 +153,31 @@ def conversions(): if __name__ == "__main__": - ingest_darkvisitors() - conversions() + import argparse + + parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + prog="ai-robots", + description="Collects and updates information about web scrapers of AI companies.", + epilog="One of the flags must be set.\n", + ) + parser.add_argument( + "--update", + action="store_true", + help="Update the robots.json file with data from darkvisitors.com/agents", + ) + parser.add_argument( + "--convert", + action="store_true", + help="Create the robots.txt and markdown table from robots.json", + ) + args = parser.parse_args() + + if not (args.update or args.convert): + print("ERROR: please provide one of the possible flags.") + parser.print_help() + + if args.update: + ingest_darkvisitors() + if args.convert: + conversions()