mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-04-04 11:03:59 +00:00
add dark visitor workflow
This commit is contained in:
parent
e12ddc0f42
commit
192bf67631
2 changed files with 60 additions and 0 deletions
22
.github/workflows/daily_update.yml
vendored
Normal file
22
.github/workflows/daily_update.yml
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
name: Daily Update from Dark Visitors
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * *"
|
||||
|
||||
jobs:
|
||||
dark-visitors:
|
||||
runs-on: ubuntu-latest
|
||||
name: dark-visitors
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
- run: |
|
||||
pip install beautifulsoup4 requests
|
||||
git config --global user.name "dark-visitors"
|
||||
git config --global user.email "dark-visitors@users.noreply.github.com"
|
||||
python code/dark_visitors.py
|
||||
git add -A
|
||||
git commit -m "Daily update from Dark Visitors"
|
||||
git push
|
||||
shell: bash
|
38
code/dark_visitors.py
Normal file
38
code/dark_visitors.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
session = requests.Session()
|
||||
response = session.get("https://darkvisitors.com/agents")
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
existing_content = json.loads(Path("./robots.json").read_text())
|
||||
|
||||
for section in soup.find_all("div", {"class": "agent-links-section"}):
|
||||
category = section.find("h2").get_text()
|
||||
for agent in section.find_all("a", href=True):
|
||||
name = agent.find("div", {"class": "agent-name"}).get_text().strip()
|
||||
desc = agent.find("p").get_text().strip()
|
||||
|
||||
if name in existing_content:
|
||||
print(f"{name} already exists in robots.json")
|
||||
continue
|
||||
# Template:
|
||||
# "Claude-Web": {
|
||||
# "operator": "[Anthropic](https:\/\/www.anthropic.com)",
|
||||
# "respect": "Unclear at this time.",
|
||||
# "function": "Scrapes data to train Anthropic's AI products.",
|
||||
# "frequency": "No information. provided.",
|
||||
# "description": "Scrapes data to train LLMs and AI products offered by Anthropic."
|
||||
# }
|
||||
existing_content[name] = {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "Unclear at this time.",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": f"{desc} More info can be found at https://darkvisitors.com/agents{agent['href']}"
|
||||
}
|
||||
|
||||
Path("./robots.json").write_text(json.dumps(existing_content, indent=4))
|
Loading…
Reference in a new issue