added a semi functional scraper

it needs to be in a country that doesn't have age verification to work (like the US or japan), or a twitter account that's verified
2026-01-14 13:08:06 +01:00
parent aff416edbc
commit cb82711633
5 changed files with 66 additions and 32 deletions
@@ -0,0 +1 @@
 **/__pycache__
@@ -0,0 +1,2 @@
 script that stitches vertical twitter image thingies together
@@ -1,34 +1,9 @@
-from bs4 import BeautifulSoup
+from stitch import get_image
-import requests
+from scrape import get_page
 from PIL import Image
 from io import BytesIO
 def get_image(html):
    # get the links
    soup = BeautifulSoup(html, "lxml")
    links = []
    for element in soup.find_all("img", attrs={"draggable": "true"}):
        src = element.get("src")
        if "media" in src:
            links.append(src.replace("&amp;", "&"))
    # get the images
    images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
    # stitch the images together
    w = images[0].width
    h = images[0].height
    out = Image.new(mode=images[0].mode, size=(w, h * 4))
    for i, image in enumerate(images):
        out.paste(image, box=(0, h * i))
    # done
    return out
 def main():
    with open("input") as f:
        get_image(f.read().strip()).save("result.png")
 if __name__ == "__main__":
-    main()
+    url = ""
    source = get_page(url)
    image = get_image(source)
    image.save("result.png")
@@ -0,0 +1,19 @@
 from selenium import webdriver
 from time import sleep
 def get_page(url):
    profile = webdriver.FirefoxProfile("./ffprofile/")
    options = webdriver.FirefoxOptions()
    options.add_argument("--headless")
    options.profile=profile
    driver = webdriver.Firefox(options)
    print("sending get request...")
    driver.get(url)
    print("waiting for page to load...")
    sleep(10)
    return driver.page_source 
 if __name__ == "__main__":
    get_page("https://x.com/wata_ruh/status/2011037668386148484")
@@ -0,0 +1,37 @@
 from bs4 import BeautifulSoup
 import requests
 from PIL import Image
 from io import BytesIO
 def get_image(html):
    # get the links
    soup = BeautifulSoup(html, "lxml")
    links = []
    for element in soup.find_all("img", attrs={"draggable": "true"}):
        src = element.get("src")
        if "media" in src:
            links.append(src.replace("&amp;", "&"))
    print(links)
    # get the images
    print("getting images...")
    images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
    # stitch the images together
    w = images[0].width
    h = images[0].height
    out = Image.new(mode=images[0].mode, size=(w, h * 4))
    for i, image in enumerate(images):
        out.paste(image, box=(0, h * i))
    # done
    return out
 def main():
    with open("input") as f:
        get_image(f.read().strip()).save("result.png")
 if __name__ == "__main__":
    main()
		`@@ -0,0 +1,2 @@`
							`script that stitches vertical twitter image thingies together`