Add newsdownloader.py

This commit is contained in:
2026-01-05 21:14:24 +01:00
parent 6f0a59c74c
commit 10f38eebcf

42
newsdownloader.py Normal file
View File

@@ -0,0 +1,42 @@
# WIP thingy that downloads official patch notes, starting from 24/12/2025 going one week back at a time
# WIP because patch notes don't always come out on thursdays, which this script just assumes
# the "idea" is to download a ton of patch notes and get the corresponding recent news articles from the wiki
# and train or fine tune some kind of model to do patch notes better
from datetime import date, timedelta
import requests
from bs4 import BeautifulSoup
d = date(2025, 12, 24)
URL = "https://azurlane.yo-star.com/news/"
search = None
while True:
print(search)
if d == search:
search = d - timedelta(days=7)
d -= timedelta(days=14)
continue
date = f"{d.year}/{d.month}/{d.day}"
print(URL + date)
response = requests.get(URL + date)
bs = BeautifulSoup(response.text, features="html.parser")
if "Oops!" in bs.text:
if search == None:
search = d
d += timedelta(days=1)
continue
wp = bs.find(id="main")
text = wp.get_text(separator="\n").splitlines()
while text[0] != "List of New Contents":
text.pop(0)
while text[-1] != "Friendly Reminders":
text.pop()
text.pop()
with open(f"{d.year}-{d.month}-{d.day}.txt", "w", encoding="utf8") as f:
f.write("\n".join(text))
d -= timedelta(days=7)