Add newsdownloader.py

2026-01-05 21:14:24 +01:00
parent 6f0a59c74c
commit 10f38eebcf
1 changed files with 42 additions and 0 deletions
@@ -0,0 +1,42 @@
 # WIP thingy that downloads official patch notes, starting from 24/12/2025 going one week back at a time
 # WIP because patch notes don't always come out on thursdays, which this script just assumes
 # the "idea" is to download a ton of patch notes and get the corresponding recent news articles from the wiki
 # and train or fine tune some kind of model to do patch notes better
 from datetime import date, timedelta
 import requests
 from bs4 import BeautifulSoup
 d = date(2025, 12, 24)
 URL = "https://azurlane.yo-star.com/news/"
 search = None
 while True:
    print(search)
    if d == search:
        search = d - timedelta(days=7)
        d -= timedelta(days=14)
        continue
    date = f"{d.year}/{d.month}/{d.day}"
    print(URL + date)
    response = requests.get(URL + date)
    bs = BeautifulSoup(response.text, features="html.parser")
    if "Oops!" in bs.text:
        if search == None:
            search = d
        d += timedelta(days=1)
        continue
    wp = bs.find(id="main")
    text = wp.get_text(separator="\n").splitlines()
    while text[0] != "List of New Contents":
        text.pop(0)
    while text[-1] != "Friendly Reminders":
        text.pop()
    text.pop()
    with open(f"{d.year}-{d.month}-{d.day}.txt", "w", encoding="utf8") as f:
        f.write("\n".join(text))
    d -= timedelta(days=7)