From 317d81f995aa2b5fed5380dcb47a2cda0e637537 Mon Sep 17 00:00:00 2001 From: Tomuxs Date: Fri, 1 Aug 2025 15:12:50 +0200 Subject: [PATCH] Improved URL handling --- py34/list.py | 11 ++++++----- py34/url.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 py34/url.py diff --git a/py34/list.py b/py34/list.py index 3171ac0..e74579a 100644 --- a/py34/list.py +++ b/py34/list.py @@ -1,5 +1,6 @@ from .post import Post -from .scraper import scraper +from .scraper import scraper, ScraperException +from .url import parse_thumbnail_url from .dockid import is_nochick, is_toodeep import urllib.parse from threading import Thread @@ -34,9 +35,9 @@ class List: # Extract image img = entry.find_all("img")[0] if "src" in img.attrs: - img_src = img["src"].split("?")[0].split("/")[-2:] + img_src = parse_thumbnail_url(img["src"]) else: - img_src = img["data-cfsrc"].split("?")[0].split("/")[-2:] + img_src = parse_thumbnail_url(img["data-cfsrc"]) # Append post def _thread_proc(*argv, **kwargs): @@ -45,8 +46,8 @@ class List: target=_thread_proc, args=( int(entry["id"][1:]), - int(img_src[0]), - img_src[1].split("_")[1].split(".")[0], # Thumbnail_[id].jpg + img_src.dir, + img_src.id, img["alt"].split(" "), ) )) diff --git a/py34/url.py b/py34/url.py new file mode 100644 index 0000000..f95762b --- /dev/null +++ b/py34/url.py @@ -0,0 +1,48 @@ +from urllib.parse import urlparse +from os.path import splitext + + +class ImageURL: + def __init__(self, image_dir: int, image_id: str, image_format: str): + self.dir: int = image_dir + self.id: str = image_id + self.format: str = image_format + + +class ThumbnailURL: + def __init__(self, image_dir: int, image_id: str): + self.dir: int = image_dir + self.id: str = image_id + + +def parse_image_url(url: str) -> ImageURL: + url = urlparse(url) + if url.hostname != "wimg.rule34.xxx": + raise Exception("Invalid URL hostname") + + path = list(filter(bool, url.path.split("/"))) + if len(path) != 3 or path[0] != "images": + raise Exception("Invalid URL path") + + file = splitext(path[2]) + return ImageURL( + int(path[1]), + file[0], + file[1], + ) + + +def parse_thumbnail_url(url: str) -> ThumbnailURL: + url = urlparse(url) + if url.hostname != "wimg.rule34.xxx": + raise Exception("Invalid URL hostname") + + path = list(filter(bool, url.path.split("/"))) + if len(path) != 3 or path[0] != "thumbnails": + raise Exception("Invalid URL path") + + file = splitext(path[2].split("_")[1]) + return ThumbnailURL( + int(path[1]), + file[0], + )