added a semi functional scraper
it needs to be in a country that doesn't have age verification to work (like the US or japan), or a twitter account that's verified
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
**/__pycache__
|
||||
2
README.md
Normal file
2
README.md
Normal file
@@ -0,0 +1,2 @@
|
||||
script that stitches vertical twitter image thingies together
|
||||
|
||||
39
main.py
39
main.py
@@ -1,34 +1,9 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
def get_image(html):
|
||||
# get the links
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
links = []
|
||||
for element in soup.find_all("img", attrs={"draggable": "true"}):
|
||||
src = element.get("src")
|
||||
if "media" in src:
|
||||
links.append(src.replace("&", "&"))
|
||||
|
||||
# get the images
|
||||
images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
|
||||
|
||||
# stitch the images together
|
||||
w = images[0].width
|
||||
h = images[0].height
|
||||
out = Image.new(mode=images[0].mode, size=(w, h * 4))
|
||||
|
||||
for i, image in enumerate(images):
|
||||
out.paste(image, box=(0, h * i))
|
||||
|
||||
# done
|
||||
return out
|
||||
|
||||
def main():
|
||||
with open("input") as f:
|
||||
get_image(f.read().strip()).save("result.png")
|
||||
from stitch import get_image
|
||||
from scrape import get_page
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
url = ""
|
||||
|
||||
source = get_page(url)
|
||||
image = get_image(source)
|
||||
image.save("result.png")
|
||||
19
scrape.py
Normal file
19
scrape.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from selenium import webdriver
|
||||
from time import sleep
|
||||
|
||||
def get_page(url):
|
||||
profile = webdriver.FirefoxProfile("./ffprofile/")
|
||||
options = webdriver.FirefoxOptions()
|
||||
options.add_argument("--headless")
|
||||
options.profile=profile
|
||||
driver = webdriver.Firefox(options)
|
||||
|
||||
print("sending get request...")
|
||||
driver.get(url)
|
||||
print("waiting for page to load...")
|
||||
sleep(10)
|
||||
|
||||
return driver.page_source
|
||||
|
||||
if __name__ == "__main__":
|
||||
get_page("https://x.com/wata_ruh/status/2011037668386148484")
|
||||
37
stitch.py
Normal file
37
stitch.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
def get_image(html):
|
||||
# get the links
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
links = []
|
||||
for element in soup.find_all("img", attrs={"draggable": "true"}):
|
||||
src = element.get("src")
|
||||
if "media" in src:
|
||||
links.append(src.replace("&", "&"))
|
||||
|
||||
print(links)
|
||||
|
||||
# get the images
|
||||
print("getting images...")
|
||||
images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
|
||||
|
||||
# stitch the images together
|
||||
w = images[0].width
|
||||
h = images[0].height
|
||||
out = Image.new(mode=images[0].mode, size=(w, h * 4))
|
||||
|
||||
for i, image in enumerate(images):
|
||||
out.paste(image, box=(0, h * i))
|
||||
|
||||
# done
|
||||
return out
|
||||
|
||||
def main():
|
||||
with open("input") as f:
|
||||
get_image(f.read().strip()).save("result.png")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user