added a semi functional scraper

it needs to be in a country that doesn't have age verification to work (like the US or japan), or a twitter account that's verified
This commit is contained in:
2026-01-14 13:08:06 +01:00
parent aff416edbc
commit cb82711633
5 changed files with 66 additions and 32 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
**/__pycache__

2
README.md Normal file
View File

@@ -0,0 +1,2 @@
script that stitches vertical twitter image thingies together

39
main.py
View File

@@ -1,34 +1,9 @@
from bs4 import BeautifulSoup from stitch import get_image
import requests from scrape import get_page
from PIL import Image
from io import BytesIO
def get_image(html):
# get the links
soup = BeautifulSoup(html, "lxml")
links = []
for element in soup.find_all("img", attrs={"draggable": "true"}):
src = element.get("src")
if "media" in src:
links.append(src.replace("&", "&"))
# get the images
images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
# stitch the images together
w = images[0].width
h = images[0].height
out = Image.new(mode=images[0].mode, size=(w, h * 4))
for i, image in enumerate(images):
out.paste(image, box=(0, h * i))
# done
return out
def main():
with open("input") as f:
get_image(f.read().strip()).save("result.png")
if __name__ == "__main__": if __name__ == "__main__":
main() url = ""
source = get_page(url)
image = get_image(source)
image.save("result.png")

19
scrape.py Normal file
View File

@@ -0,0 +1,19 @@
from selenium import webdriver
from time import sleep
def get_page(url):
profile = webdriver.FirefoxProfile("./ffprofile/")
options = webdriver.FirefoxOptions()
options.add_argument("--headless")
options.profile=profile
driver = webdriver.Firefox(options)
print("sending get request...")
driver.get(url)
print("waiting for page to load...")
sleep(10)
return driver.page_source
if __name__ == "__main__":
get_page("https://x.com/wata_ruh/status/2011037668386148484")

37
stitch.py Normal file
View File

@@ -0,0 +1,37 @@
from bs4 import BeautifulSoup
import requests
from PIL import Image
from io import BytesIO
def get_image(html):
# get the links
soup = BeautifulSoup(html, "lxml")
links = []
for element in soup.find_all("img", attrs={"draggable": "true"}):
src = element.get("src")
if "media" in src:
links.append(src.replace("&", "&"))
print(links)
# get the images
print("getting images...")
images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
# stitch the images together
w = images[0].width
h = images[0].height
out = Image.new(mode=images[0].mode, size=(w, h * 4))
for i, image in enumerate(images):
out.paste(image, box=(0, h * i))
# done
return out
def main():
with open("input") as f:
get_image(f.read().strip()).save("result.png")
if __name__ == "__main__":
main()