PR Feedback: log special-case, comment consistency

This commit is contained in:
Frederic Barthelemy 2025-04-05 09:01:52 -07:00
parent 5f5a89c38c
commit c6f308cbd0
No known key found for this signature in database
GPG key ID: 6FF43C49A5D473EF

View file

@ -107,13 +107,16 @@ def clean_robot_name(name):
# This was specifically spotted in "Perplexity-User"
# Looks like a non-breaking hyphen introduced by the HTML rendering software
# Reading the source page for Perplexity: https://docs.perplexity.ai/guides/bots
# You can see the bot is listed several times as "PerplexityUser" with a normal hyphen,
# You can see the bot is listed several times as "Perplexity-User" with a normal hyphen,
# and it's only the Row-Heading that has the special hyphen
#
# Technically, there's no reason there wouldn't someday be a bot that
# actually uses a non-breaking hyphen, but that seems unlikely,
# so this solution should be fine for now.
return re.sub(r"\u2011", "-", name)
result = re.sub(r"\u2011", "-", name)
if result != name:
print(f"\tCleaned '{name}' to '{result}' - unicode/html mangled chars normalized.")
return result
def ingest_darkvisitors():