added a semi functional scraper
it needs to be in a country that doesn't have age verification to work (like the US or japan), or a twitter account that's verified
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
**/__pycache__
|
||||||
2
README.md
Normal file
2
README.md
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
script that stitches vertical twitter image thingies together
|
||||||
|
|
||||||
39
main.py
39
main.py
@@ -1,34 +1,9 @@
|
|||||||
from bs4 import BeautifulSoup
|
from stitch import get_image
|
||||||
import requests
|
from scrape import get_page
|
||||||
from PIL import Image
|
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
def get_image(html):
|
|
||||||
# get the links
|
|
||||||
soup = BeautifulSoup(html, "lxml")
|
|
||||||
links = []
|
|
||||||
for element in soup.find_all("img", attrs={"draggable": "true"}):
|
|
||||||
src = element.get("src")
|
|
||||||
if "media" in src:
|
|
||||||
links.append(src.replace("&", "&"))
|
|
||||||
|
|
||||||
# get the images
|
|
||||||
images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
|
|
||||||
|
|
||||||
# stitch the images together
|
|
||||||
w = images[0].width
|
|
||||||
h = images[0].height
|
|
||||||
out = Image.new(mode=images[0].mode, size=(w, h * 4))
|
|
||||||
|
|
||||||
for i, image in enumerate(images):
|
|
||||||
out.paste(image, box=(0, h * i))
|
|
||||||
|
|
||||||
# done
|
|
||||||
return out
|
|
||||||
|
|
||||||
def main():
|
|
||||||
with open("input") as f:
|
|
||||||
get_image(f.read().strip()).save("result.png")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
url = ""
|
||||||
|
|
||||||
|
source = get_page(url)
|
||||||
|
image = get_image(source)
|
||||||
|
image.save("result.png")
|
||||||
19
scrape.py
Normal file
19
scrape.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
from selenium import webdriver
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
def get_page(url):
|
||||||
|
profile = webdriver.FirefoxProfile("./ffprofile/")
|
||||||
|
options = webdriver.FirefoxOptions()
|
||||||
|
options.add_argument("--headless")
|
||||||
|
options.profile=profile
|
||||||
|
driver = webdriver.Firefox(options)
|
||||||
|
|
||||||
|
print("sending get request...")
|
||||||
|
driver.get(url)
|
||||||
|
print("waiting for page to load...")
|
||||||
|
sleep(10)
|
||||||
|
|
||||||
|
return driver.page_source
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
get_page("https://x.com/wata_ruh/status/2011037668386148484")
|
||||||
37
stitch.py
Normal file
37
stitch.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
def get_image(html):
|
||||||
|
# get the links
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
links = []
|
||||||
|
for element in soup.find_all("img", attrs={"draggable": "true"}):
|
||||||
|
src = element.get("src")
|
||||||
|
if "media" in src:
|
||||||
|
links.append(src.replace("&", "&"))
|
||||||
|
|
||||||
|
print(links)
|
||||||
|
|
||||||
|
# get the images
|
||||||
|
print("getting images...")
|
||||||
|
images = [Image.open(BytesIO(requests.get(link).content)) for link in links]
|
||||||
|
|
||||||
|
# stitch the images together
|
||||||
|
w = images[0].width
|
||||||
|
h = images[0].height
|
||||||
|
out = Image.new(mode=images[0].mode, size=(w, h * 4))
|
||||||
|
|
||||||
|
for i, image in enumerate(images):
|
||||||
|
out.paste(image, box=(0, h * i))
|
||||||
|
|
||||||
|
# done
|
||||||
|
return out
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open("input") as f:
|
||||||
|
get_image(f.read().strip()).save("result.png")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user