Add newsdownloader.py
This commit is contained in:
42
newsdownloader.py
Normal file
42
newsdownloader.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# WIP thingy that downloads official patch notes, starting from 24/12/2025 going one week back at a time
|
||||||
|
# WIP because patch notes don't always come out on thursdays, which this script just assumes
|
||||||
|
# the "idea" is to download a ton of patch notes and get the corresponding recent news articles from the wiki
|
||||||
|
# and train or fine tune some kind of model to do patch notes better
|
||||||
|
|
||||||
|
from datetime import date, timedelta
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
d = date(2025, 12, 24)
|
||||||
|
URL = "https://azurlane.yo-star.com/news/"
|
||||||
|
|
||||||
|
search = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print(search)
|
||||||
|
if d == search:
|
||||||
|
search = d - timedelta(days=7)
|
||||||
|
d -= timedelta(days=14)
|
||||||
|
continue
|
||||||
|
date = f"{d.year}/{d.month}/{d.day}"
|
||||||
|
print(URL + date)
|
||||||
|
|
||||||
|
response = requests.get(URL + date)
|
||||||
|
bs = BeautifulSoup(response.text, features="html.parser")
|
||||||
|
if "Oops!" in bs.text:
|
||||||
|
if search == None:
|
||||||
|
search = d
|
||||||
|
d += timedelta(days=1)
|
||||||
|
continue
|
||||||
|
wp = bs.find(id="main")
|
||||||
|
text = wp.get_text(separator="\n").splitlines()
|
||||||
|
while text[0] != "List of New Contents":
|
||||||
|
text.pop(0)
|
||||||
|
while text[-1] != "Friendly Reminders":
|
||||||
|
text.pop()
|
||||||
|
text.pop()
|
||||||
|
|
||||||
|
with open(f"{d.year}-{d.month}-{d.day}.txt", "w", encoding="utf8") as f:
|
||||||
|
f.write("\n".join(text))
|
||||||
|
|
||||||
|
d -= timedelta(days=7)
|
||||||
Reference in New Issue
Block a user