Improved URL handling

This commit is contained in:
2025-08-01 15:12:50 +02:00
parent 56fdad7fd4
commit 317d81f995
2 changed files with 54 additions and 5 deletions

View File

@ -1,5 +1,6 @@
from .post import Post
from .scraper import scraper
from .scraper import scraper, ScraperException
from .url import parse_thumbnail_url
from .dockid import is_nochick, is_toodeep
import urllib.parse
from threading import Thread
@ -34,9 +35,9 @@ class List:
# Extract image
img = entry.find_all("img")[0]
if "src" in img.attrs:
img_src = img["src"].split("?")[0].split("/")[-2:]
img_src = parse_thumbnail_url(img["src"])
else:
img_src = img["data-cfsrc"].split("?")[0].split("/")[-2:]
img_src = parse_thumbnail_url(img["data-cfsrc"])
# Append post
def _thread_proc(*argv, **kwargs):
@ -45,8 +46,8 @@ class List:
target=_thread_proc,
args=(
int(entry["id"][1:]),
int(img_src[0]),
img_src[1].split("_")[1].split(".")[0], # Thumbnail_[id].jpg
img_src.dir,
img_src.id,
img["alt"].split(" "),
)
))

48
py34/url.py Normal file
View File

@ -0,0 +1,48 @@
from urllib.parse import urlparse
from os.path import splitext
class ImageURL:
def __init__(self, image_dir: int, image_id: str, image_format: str):
self.dir: int = image_dir
self.id: str = image_id
self.format: str = image_format
class ThumbnailURL:
def __init__(self, image_dir: int, image_id: str):
self.dir: int = image_dir
self.id: str = image_id
def parse_image_url(url: str) -> ImageURL:
url = urlparse(url)
if url.hostname != "wimg.rule34.xxx":
raise Exception("Invalid URL hostname")
path = list(filter(bool, url.path.split("/")))
if len(path) != 3 or path[0] != "images":
raise Exception("Invalid URL path")
file = splitext(path[2])
return ImageURL(
int(path[1]),
file[0],
file[1],
)
def parse_thumbnail_url(url: str) -> ThumbnailURL:
url = urlparse(url)
if url.hostname != "wimg.rule34.xxx":
raise Exception("Invalid URL hostname")
path = list(filter(bool, url.path.split("/")))
if len(path) != 3 or path[0] != "thumbnails":
raise Exception("Invalid URL path")
file = splitext(path[2].split("_")[1])
return ThumbnailURL(
int(path[1]),
file[0],
)