Improved URL handling
This commit is contained in:
11
py34/list.py
11
py34/list.py
@ -1,5 +1,6 @@
|
||||
from .post import Post
|
||||
from .scraper import scraper
|
||||
from .scraper import scraper, ScraperException
|
||||
from .url import parse_thumbnail_url
|
||||
from .dockid import is_nochick, is_toodeep
|
||||
import urllib.parse
|
||||
from threading import Thread
|
||||
@ -34,9 +35,9 @@ class List:
|
||||
# Extract image
|
||||
img = entry.find_all("img")[0]
|
||||
if "src" in img.attrs:
|
||||
img_src = img["src"].split("?")[0].split("/")[-2:]
|
||||
img_src = parse_thumbnail_url(img["src"])
|
||||
else:
|
||||
img_src = img["data-cfsrc"].split("?")[0].split("/")[-2:]
|
||||
img_src = parse_thumbnail_url(img["data-cfsrc"])
|
||||
|
||||
# Append post
|
||||
def _thread_proc(*argv, **kwargs):
|
||||
@ -45,8 +46,8 @@ class List:
|
||||
target=_thread_proc,
|
||||
args=(
|
||||
int(entry["id"][1:]),
|
||||
int(img_src[0]),
|
||||
img_src[1].split("_")[1].split(".")[0], # Thumbnail_[id].jpg
|
||||
img_src.dir,
|
||||
img_src.id,
|
||||
img["alt"].split(" "),
|
||||
)
|
||||
))
|
||||
|
||||
48
py34/url.py
Normal file
48
py34/url.py
Normal file
@ -0,0 +1,48 @@
|
||||
from urllib.parse import urlparse
|
||||
from os.path import splitext
|
||||
|
||||
|
||||
class ImageURL:
|
||||
def __init__(self, image_dir: int, image_id: str, image_format: str):
|
||||
self.dir: int = image_dir
|
||||
self.id: str = image_id
|
||||
self.format: str = image_format
|
||||
|
||||
|
||||
class ThumbnailURL:
|
||||
def __init__(self, image_dir: int, image_id: str):
|
||||
self.dir: int = image_dir
|
||||
self.id: str = image_id
|
||||
|
||||
|
||||
def parse_image_url(url: str) -> ImageURL:
|
||||
url = urlparse(url)
|
||||
if url.hostname != "wimg.rule34.xxx":
|
||||
raise Exception("Invalid URL hostname")
|
||||
|
||||
path = list(filter(bool, url.path.split("/")))
|
||||
if len(path) != 3 or path[0] != "images":
|
||||
raise Exception("Invalid URL path")
|
||||
|
||||
file = splitext(path[2])
|
||||
return ImageURL(
|
||||
int(path[1]),
|
||||
file[0],
|
||||
file[1],
|
||||
)
|
||||
|
||||
|
||||
def parse_thumbnail_url(url: str) -> ThumbnailURL:
|
||||
url = urlparse(url)
|
||||
if url.hostname != "wimg.rule34.xxx":
|
||||
raise Exception("Invalid URL hostname")
|
||||
|
||||
path = list(filter(bool, url.path.split("/")))
|
||||
if len(path) != 3 or path[0] != "thumbnails":
|
||||
raise Exception("Invalid URL path")
|
||||
|
||||
file = splitext(path[2].split("_")[1])
|
||||
return ThumbnailURL(
|
||||
int(path[1]),
|
||||
file[0],
|
||||
)
|
||||
Reference in New Issue
Block a user