added a semi functional scraper

it needs to be in a country that doesn't have age verification to work (like the US or japan), or a twitter account that's verified
2026-01-14 13:08:06 +01:00
parent aff416edbc
commit cb82711633
5 changed files with 66 additions and 32 deletions
@@ -0,0 +1 @@
+**/__pycache__
@@ -0,0 +1,2 @@
+script that stitches vertical twitter image thingies together
+
@@ -1,34 +1,9 @@
-from bs4 import BeautifulSoup
-import requests
-from PIL import Image
-from io import BytesIO
-
-def get_image(html):
-    # get the links
-    soup = BeautifulSoup(html, "lxml")
-    links = []
-    for element in soup.find_all("img", attrs={"draggable": "true"}):
-        src = element.get("src")
-        if "media" in src:
-            links.append(src.replace("&amp;", "&"))
-    
-    # get the images
-    images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
-
-    # stitch the images together
-    w = images[0].width
-    h = images[0].height
-    out = Image.new(mode=images[0].mode, size=(w, h * 4))
-
-    for i, image in enumerate(images):
-        out.paste(image, box=(0, h * i))
-
-    # done
-    return out
-
-def main():
-    with open("input") as f:
-        get_image(f.read().strip()).save("result.png")
+from stitch import get_image
+from scrape import get_page

 if __name__ == "__main__":
-    main()
+    url = ""
+
+    source = get_page(url)
+    image = get_image(source)
+    image.save("result.png")
@@ -0,0 +1,19 @@
+from selenium import webdriver
+from time import sleep
+
+def get_page(url):
+    profile = webdriver.FirefoxProfile("./ffprofile/")
+    options = webdriver.FirefoxOptions()
+    options.add_argument("--headless")
+    options.profile=profile
+    driver = webdriver.Firefox(options)
+
+    print("sending get request...")
+    driver.get(url)
+    print("waiting for page to load...")
+    sleep(10)
+
+    return driver.page_source 
+
+if __name__ == "__main__":
+    get_page("https://x.com/wata_ruh/status/2011037668386148484")
@@ -0,0 +1,37 @@
+from bs4 import BeautifulSoup
+import requests
+from PIL import Image
+from io import BytesIO
+
+def get_image(html):
+    # get the links
+    soup = BeautifulSoup(html, "lxml")
+    links = []
+    for element in soup.find_all("img", attrs={"draggable": "true"}):
+        src = element.get("src")
+        if "media" in src:
+            links.append(src.replace("&amp;", "&"))
+
+    print(links)
+    
+    # get the images
+    print("getting images...")
+    images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
+
+    # stitch the images together
+    w = images[0].width
+    h = images[0].height
+    out = Image.new(mode=images[0].mode, size=(w, h * 4))
+
+    for i, image in enumerate(images):
+        out.paste(image, box=(0, h * i))
+
+    # done
+    return out
+
+def main():
+    with open("input") as f:
+        get_image(f.read().strip()).save("result.png")
+
+if __name__ == "__main__":
+    main()
				`@@ -0,0 +1,2 @@`
				`script that stitches vertical twitter image thingies together`