From d8dedef9f916cd3a487032b5d846864961d1d0a0 Mon Sep 17 00:00:00 2001 From: Cory Dransfeldt Date: Sun, 14 Apr 2024 09:54:38 -0700 Subject: [PATCH] feat: block bad bots --- api/block-bots.js | 53 ++++ package.json | 2 +- src/_data/json/robots.js | 658 +-------------------------------------- 3 files changed, 61 insertions(+), 652 deletions(-) create mode 100644 api/block-bots.js diff --git a/api/block-bots.js b/api/block-bots.js new file mode 100644 index 00000000..bbaeca79 --- /dev/null +++ b/api/block-bots.js @@ -0,0 +1,53 @@ +import { getStore } from '@netlify/blobs' +import { DateTime } from 'luxon' + +const botUas = [ + 'AdsBot-Google', + 'Amazonbot', + 'anthropic-ai', + 'Applebot', + 'AwarioRssBot', + 'AwarioSmartBot', + 'Bytespider', + 'CCBot', + 'ChatGPT', + 'ChatGPT-User', + 'Claude-Web', + 'ClaudeBot', + 'cohere-ai', + 'DataForSeoBot', + 'Diffbot', + 'FacebookBot', + 'FacebookBot', + 'Google-Extended', + 'GPTBot', + 'ImagesiftBot', + 'magpie-crawler', + 'omgili', + 'Omgilibot', + 'peer39_crawler', + 'PerplexityBot', + 'YouBot' +] + +export default async (request, context) => { + const ua = request.headers.get('user-agent'); + const bots = getStore('bots') + let isBot = false + + botUas.forEach(u => { + if (ua.toLowerCase().includes(u.toLowerCase())) { + isBot = true + } + }) + + if (isBot) await bots.set(ua, DateTime.now()) + + const response = isBot ? new Response(null, { status: 401 }) : await context.next(); + + return response +}; + +export const config = { + path: '/*', +} \ No newline at end of file diff --git a/package.json b/package.json index 56c7fd64..70093f76 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "coryd.dev", - "version": "10.1.2", + "version": "10.2.2", "description": "The source for my personal site. Built using 11ty.", "type": "module", "scripts": { diff --git a/src/_data/json/robots.js b/src/_data/json/robots.js index 7301ebff..a84c9ef0 100644 --- a/src/_data/json/robots.js +++ b/src/_data/json/robots.js @@ -1,675 +1,31 @@ export default [ - '01h4x.com', - '360Spider', - '404checker', - '404enemy', - '80legs', - 'ADmantX', - 'AIBOT', - 'ALittle Client', - 'ASPSeek', - 'Abonti', - 'Aboundex', - 'Aboundexbot', - 'Acunetix', 'AdsBot-Google', - 'AdsTxtCrawlerTP', - 'AfD-Verbotsverfahren', - 'AhrefsBot', - 'AiHitBot', - 'Aipbot', - 'Alexibot', - 'AllSubmitter', - 'Alligator', - 'AlphaBot', 'Amazonbot', - 'Anarchie', - 'Anarchy', - 'Anarchy99', - 'Ankit', - 'Anthill', 'anthropic-ai', - 'Apexoo', 'Applebot', - 'Aspiegel', - 'Asterias', - 'Atomseobot', - 'Attach', 'AwarioRssBot', 'AwarioSmartBot', - 'BBBike', - 'BDCbot', - 'BDFetch', - 'BLEXBot', - 'BackDoorBot', - 'BackStreet', - 'BackWeb', - 'Backlink-Ceck', - 'BacklinkCrawler', - 'Badass', - 'Bandit', - 'Barkrowler', - 'BatchFTP', - 'Battleztar Bazinga', - 'BetaBot', - 'Bigfoot', - 'Bitacle', - 'BlackWidow', - 'Black Hole', - 'Blackboard', - 'Blow', - 'BlowFish', - 'Boardreader', - 'Bolt', - 'BotALot', - 'Brandprotect', - 'Brandwatch', - 'Buck', - 'Buddy', - 'BuiltBotTough', - 'BuiltWith', - 'Bullseye', - 'BunnySlippers', - 'BuzzSumo', 'Bytespider', - 'CATExplorador', 'CCBot', - 'CODE87', - 'CSHttp', - 'Calculon', - 'CazoodleBot', - 'Cegbfeieh', - 'CensysInspect', 'ChatGPT-User', - 'CheTeam', - 'CheeseBot', - 'CherryPicker', - 'ChinaClaw', - 'Chlooe', - 'Citoid', - 'Claritybot', 'ClaudeBot', 'Claude-Web', - 'Cliqzbot', - 'Cloud mapping', - 'Cocolyzebot', - 'Cogentbot', - 'Collector', - 'Copier', - 'CopyRightCheck', - 'Copyscape', - 'Cosmos', - 'Craftbot', - 'Crawling at Home Project', - 'CrazyWebCrawler', - 'Crescent', - 'CrunchBot', - 'Curious', - 'Custo', - 'CyotekWebCopy', - 'DBLBot', - 'DIIbot', - 'DSearch', - 'DTS Agent', - 'DataCha0s', - 'DatabaseDriverMysqli', - 'Demon', - 'Deusu', - 'Devil', - 'Diffbot', - 'Digincore', - 'DigitalPebble', - 'Dirbuster', - 'Disco', - 'Discobot', - 'Discoverybot', - 'Dispatch', - 'DittoSpyder', - 'DnBCrawler-Analytics', - 'DnyzBot', - 'DomCopBot', - 'DomainAppender', - 'DomainCrawler', - 'DomainSigmaCrawler', - 'DomainStatsBot', - 'Domains Project', - 'Dotbot', - 'Download Wonder', - 'Dragonfly', - 'Drip', - 'ECCP/1.0', - 'EMail Siphon', - 'EMail Wolf', - 'EasyDL', - 'Ebingbong', - 'Ecxi', - 'EirGrabber', - 'EroCrawler', - 'Evil', - 'Exabot', - 'Express WebPictures', - 'ExtLinksBot', - 'Extractor', - 'ExtractorPro', - 'Extreme Picture Finder', - 'EyeNetIE', - 'Ezooms', - 'FDM', - 'FHscan', + 'cohere-ai', + 'DataForSeoBot', 'FacebookBot', - 'FemtosearchBot', - 'Fimap', - 'Firefox/7.0', - 'FlashGet', - 'Flunky', - 'Foobot', - 'Freeuploader', 'FriendlyCrawler', - 'FrontPage', - 'Fuzz', - 'FyberSpider', - 'Fyrebot', - 'G-i-g-a-b-o-t', - 'GPTBot', - 'GT::WWW', - 'GalaxyBot', - 'Genieo', - 'GermCrawler', - 'GetRight', - 'GetWeb', - 'Getintent', - 'Gigabot', - 'Go!Zilla', - 'Go-Ahead-Got-It', - 'GoZilla', 'Google-Extended', 'GoogleOther', - 'Gotit', - 'GrabNet', - 'Grabber', - 'Grafula', - 'GrapeFX', - 'GrapeshotCrawler', - 'GridBot', - 'HEADMasterSEO', - 'HMView', - 'HTMLparser', - 'HTTP::Lite', - 'HTTrack', - 'Haansoft', - 'HaosouSpider', - 'Harvest', - 'Havij', - 'Heritrix', - 'Hloader', - 'HonoluluBot', - 'Humanlinks', - 'HybridBot', - 'IDBTE4M', - 'IDBot', - 'IRLbot', - 'Iblog', - 'Id-search', - 'IlseBot', - 'Image Fetch', - 'Image Sucker', + 'GPTBot', 'ImagesiftBot', - 'IndeedBot', - 'Indy Library', - 'InfoNaviRobot', - 'InfoTekies', - 'Intelliseek', - 'InterGET', - 'InternetSeer', - 'Internet Ninja', - 'Iria', - 'Iskanie', - 'IstellaBot', - 'JOC Web Spider', - 'JamesBOT', - 'Jbrofuzz', - 'JennyBot', - 'JetCar', - 'Jetty', - 'JikeSpider', - 'Joomla', - 'Jorgee', - 'JustView', - 'Jyxobot', - 'Kenjin Spider', - 'Keybot Translation-Search-Machine', - 'Keyword Density', - 'Kinza', - 'Kozmosbot', - 'LNSpiderguy', - 'LWP::Simple', - 'Lanshanbot', - 'Larbin', - 'Leap', - 'LeechFTP', - 'LeechGet', - 'LexiBot', - 'Lftp', - 'LibWeb', - 'Libwhisker', - 'LieBaoFast', - 'Lightspeedsystems', - 'Likse', - 'LinkScan', - 'LinkWalker', - 'Linkbot', - 'LinkextractorPro', - 'LinkpadBot', - 'LinksManager', - 'LinqiaMetadataDownloaderBot', - 'LinqiaRSSBot', - 'LinqiaScrapeBot', - 'Lipperhey', - 'Lipperhey Spider', - 'Litemage_walker', - 'Lmspider', - 'Ltx71', - 'MFC_Tear_Sample', - 'MIDown tool', - 'MIIxpc', - 'MJ12bot', - 'MQQBrowser', - 'MSFrontPage', - 'MSIECrawler', - 'MTRobot', - 'Mag-Net', - 'Magnet', - 'Mail.RU_Bot', - 'Majestic-SEO', - 'Majestic12', - 'Majestic SEO', - 'MarkMonitor', - 'MarkWatch', - 'Mass Downloader', - 'Masscan', - 'Mata Hari', - 'MauiBot', - 'Mb2345Browser', - 'MeanPath Bot', - 'Meanpathbot', - 'Mediatoolkitbot', - 'MegaIndex.ru', + 'magpie-crawler', 'Meltwater', - 'Metauri', - 'MicroMessenger', - 'Microsoft Data Access', - 'Microsoft URL Control', - 'Minefield', - 'Mister PiX', - 'Moblie Safari', - 'Mojeek', - 'Mojolicious', - 'MolokaiBot', - 'Morfeus Fucking Scanner', - 'Mozlila', - 'Mr.4x3', - 'Msrabot', - 'Musobot', - 'NICErsPRO', - 'NPbot', - 'Name Intelligence', - 'Nameprotect', - 'Navroad', - 'NearSite', - 'Needle', - 'Nessus', - 'NetAnts', - 'NetLyzer', - 'NetMechanic', - 'NetSpider', - 'NetZIP', - 'Net Vampire', - 'Netcraft', - 'Nettrack', - 'Netvibes', - 'NextGenSearchBot', - 'Nibbler', - 'Niki-bot', - 'Nikto', - 'NimbleCrawler', - 'Nimbostratus', - 'Ninja', - 'Nmap', - 'Nuclei', - 'Nutch', - 'Octopus', - 'Offline Explorer', - 'Offline Navigator', - 'OnCrawl', - 'OpenLinkProfiler', - 'OpenVAS', - 'Openfind', - 'Openvas', - 'OrangeBot', - 'OrangeSpider', - 'OutclicksBot', - 'OutfoxBot', - 'PECL::HTTP', - 'PHPCrawl', - 'POE-Component-Client-HTTP', - 'PageAnalyzer', - 'PageGrabber', - 'PageScorer', - 'PageThing.com', - 'Page Analyzer', - 'Pandalytics', - 'Panscient', - 'Papa Foto', - 'Pavuk', - 'PeoplePal', + 'omgili', + 'omgilibot', 'peer39_crawler', 'peer39_crawler/1.0', 'PerplexityBot', - 'Petalbot', - 'Pi-Monster', - 'Picscout', - 'Picsearch', - 'PictureFinder', - 'Piepmatz', - 'Pimonster', - 'Pixray', - 'PleaseCrawl', - 'Pockey', - 'ProPowerBot', - 'ProWebWalker', - 'Probethenet', - 'Proximic', - 'Psbot', - 'Pu_iN', - 'Pump', - 'PxBroker', - 'PyCurl', - 'QueryN Metasearch', - 'Quick-Crawler', - 'RSSingBot', - 'Rainbot', - 'RankActive', - 'RankActiveLinkBot', - 'RankFlex', - 'RankingBot', - 'RankingBot2', - 'Rankivabot', - 'RankurBot', - 'Re-re', - 'ReGet', - 'RealDownload', - 'Reaper', - 'RebelMouse', - 'Recorder', - 'RedesScrapy', - 'RepoMonkey', - 'Ripper', - 'RocketCrawler', - 'Rogerbot', - 'SBIder', - 'SEOkicks', - 'SEOkicks-Robot', - 'SEOlyticsCrawler', - 'SEOprofiler', - 'SEOstats', - 'SISTRIX', - 'SMTBot', - 'SalesIntelligent', - 'ScanAlert', - 'Scanbot', - 'ScoutJet', - 'Scrapy', - 'Screaming', - 'ScreenerBot', - 'ScrepyBot', - 'Searchestate', - 'SearchmetricsBot', - 'Seekport', - 'SeekportBot', - 'SemanticJuice', - 'Semrush', - 'SemrushBot', - 'SentiBot', - 'SenutoBot', - 'SeoSiteCheckup', - 'SeobilityBot', - 'Seomoz', - 'Shodan', - 'Siphon', - 'SiteCheckerBotCrawler', - 'SiteExplorer', - 'SiteLockSpider', - 'SiteSnagger', - 'SiteSucker', - 'Site Sucker', - 'Sitebeam', - 'Siteimprove', - 'Sitevigil', - 'SlySearch', - 'SmartDownload', - 'Snake', - 'Snapbot', - 'Snoopy', - 'SocialRankIOBot', - 'Sociscraper', - 'Sogou web spider', - 'Sosospider', - 'Sottopop', - 'SpaceBison', - 'Spammen', - 'SpankBot', - 'Spanner', - 'Spbot', - 'Spinn3r', - 'SputnikBot', - 'Sqlmap', - 'Sqlworm', - 'Sqworm', - 'Steeler', - 'Stripper', - 'Sucker', - 'Sucuri', - 'SuperBot', - 'SuperHTTP', - 'Surfbot', - 'SurveyBot', - 'Suzuran', - 'Swiftbot', - 'Szukacz', - 'T0PHackTeam', - 'T8Abot', - 'Teleport', - 'TeleportPro', - 'Telesoft', - 'Telesphoreo', - 'Telesphorep', - 'TheNomad', - 'The Intraformant', - 'Thumbor', - 'TightTwatBot', - 'TinyTestBot', - 'Titan', - 'Toata', - 'Toweyabot', - 'Tracemyfile', - 'Trendiction', - 'Trendictionbot', - 'True_Robot', - 'Turingos', - 'Turnitin', - 'TurnitinBot', - 'TwengaBot', - 'Twice', - 'Typhoeus', - 'URLy.Warning', - 'URLy Warning', - 'UnisterBot', - 'Upflow', - 'V-BOT', - 'VB Project', - 'VCI', - 'Vacuum', - 'Vagabondo', - 'VelenPublicWebCrawler', - 'VeriCiteCrawler', - 'VidibleScraper', - 'Virusdie', - 'VoidEYE', - 'Voil', - 'Voltron', - 'WASALive-Bot', - 'WBSearchBot', - 'WEBDAV', - 'WISENutbot', - 'WPScan', - 'WWW-Collector-E', - 'WWW-Mechanize', - 'WWW::Mechanize', - 'WWWOFFLE', - 'Wallpapers', - 'Wallpapers/3.0', - 'WallpapersHD', - 'WeSEE', - 'WebAuto', - 'WebBandit', - 'WebCollage', - 'WebCopier', - 'WebEnhancer', - 'WebFetch', - 'WebFuck', - 'WebGo IS', - 'WebImageCollector', - 'WebLeacher', - 'WebPix', - 'WebReaper', - 'WebSauger', - 'WebStripper', - 'WebSucker', - 'WebWhacker', - 'WebZIP', - 'Web Auto', - 'Web Collage', - 'Web Enhancer', - 'Web Fetch', - 'Web Fuck', - 'Web Pix', - 'Web Sauger', - 'Web Sucker', - 'Webalta', - 'WebmasterWorldForumBot', - 'Webshag', - 'WebsiteExtractor', - 'WebsiteQuester', - 'Website Quester', - 'Webster', - 'Whack', - 'Whacker', - 'Whatweb', - 'Who.is Bot', - 'Widow', - 'WinHTTrack', - 'WiseGuys Robot', - 'Wonderbot', - 'Woobot', - 'Wotbox', - 'Wprecon', - 'Xaldon WebSpider', - 'Xaldon_WebSpider', - 'Xenu', - 'YoudaoBot', - 'Zade', - 'Zauba', - 'Zermelo', - 'Zeus', - 'Zitebot', - 'ZmEu', - 'ZoomBot', - 'ZoominfoBot', - 'ZumBot', - 'ZyBorg', - 'adscanner', - 'anthropic-ai', - 'archive.org_bot', - 'arquivo-web-crawler', - 'arquivo.pt', - 'autoemailspider', - 'backlink-check', - 'cah.io.community', - 'check1.exe', - 'clark-crawler', - 'coccocbot', - 'cognitiveseo', - 'cohere-ai', - 'com.plumanalytics', - 'crawl.sogou.com', - 'crawler.feedback', - 'crawler4j', - 'dataforseo.com', - 'dataforseobot', - 'demandbase-bot', - 'domainsproject.org', - 'eCatch', - 'evc-batch', - 'FacebookBot', - 'facebookscraper', - 'gopher', - 'heritrix', - 'imagesift.com', - 'instabid', - 'internetVista monitor', - 'ips-agent', - 'isitwp.com', - 'iubenda-radar', - 'linkdexbot', - 'lwp-request', - 'lwp-trivial', - 'magpie-crawler', - 'meanpathbot', - 'mediawords', - 'muhstik-scan', - 'netEstate NE Crawler', - 'oBot', - 'omgili', - 'omgilibot', - 'openai', - 'openai.com', - 'page scorer', - 'pcBrowser', - 'plumanalytics', - 'polaris version', - 'probe-image-size', - 'ripz', - 'SEMrushBot', - 's1z.ru', - 'satoristudio.net', - 'scalaj-http', - 'scan.lol', + 'PiplBot', 'Seekr', - 'seobility', - 'seocompany.store', - 'seoscanners', - 'seostar', - 'serpstatbot', - 'sexsearcher', - 'sitechecker.pro', - 'siteripz', - 'sogouspider', - 'sp_auditbot', - 'spyfu', - 'sysscan', - 'tAkeOut', - 'trendiction.com', - 'trendiction.de', - 'ubermetrics-technologies.com', - 'voyagerx.com', - 'webgains-bot', - 'webmeup-crawler', - 'webpros.com', - 'webprosbot', - 'x09Mozilla', - 'x22Mozilla', - 'xpymep1.exe', 'YouBot', - 'zauba.io', - 'zgrab', ] \ No newline at end of file