diff --git a/newsdownloader.py b/newsdownloader.py new file mode 100644 index 0000000..07e70fc --- /dev/null +++ b/newsdownloader.py @@ -0,0 +1,42 @@ +# WIP thingy that downloads official patch notes, starting from 24/12/2025 going one week back at a time +# WIP because patch notes don't always come out on thursdays, which this script just assumes +# the "idea" is to download a ton of patch notes and get the corresponding recent news articles from the wiki +# and train or fine tune some kind of model to do patch notes better + +from datetime import date, timedelta +import requests +from bs4 import BeautifulSoup + +d = date(2025, 12, 24) +URL = "https://azurlane.yo-star.com/news/" + +search = None + +while True: + print(search) + if d == search: + search = d - timedelta(days=7) + d -= timedelta(days=14) + continue + date = f"{d.year}/{d.month}/{d.day}" + print(URL + date) + + response = requests.get(URL + date) + bs = BeautifulSoup(response.text, features="html.parser") + if "Oops!" in bs.text: + if search == None: + search = d + d += timedelta(days=1) + continue + wp = bs.find(id="main") + text = wp.get_text(separator="\n").splitlines() + while text[0] != "List of New Contents": + text.pop(0) + while text[-1] != "Friendly Reminders": + text.pop() + text.pop() + + with open(f"{d.year}-{d.month}-{d.day}.txt", "w", encoding="utf8") as f: + f.write("\n".join(text)) + + d -= timedelta(days=7)