Improved URL handling
This commit is contained in:
11
py34/list.py
11
py34/list.py
@ -1,5 +1,6 @@
|
|||||||
from .post import Post
|
from .post import Post
|
||||||
from .scraper import scraper
|
from .scraper import scraper, ScraperException
|
||||||
|
from .url import parse_thumbnail_url
|
||||||
from .dockid import is_nochick, is_toodeep
|
from .dockid import is_nochick, is_toodeep
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
@ -34,9 +35,9 @@ class List:
|
|||||||
# Extract image
|
# Extract image
|
||||||
img = entry.find_all("img")[0]
|
img = entry.find_all("img")[0]
|
||||||
if "src" in img.attrs:
|
if "src" in img.attrs:
|
||||||
img_src = img["src"].split("?")[0].split("/")[-2:]
|
img_src = parse_thumbnail_url(img["src"])
|
||||||
else:
|
else:
|
||||||
img_src = img["data-cfsrc"].split("?")[0].split("/")[-2:]
|
img_src = parse_thumbnail_url(img["data-cfsrc"])
|
||||||
|
|
||||||
# Append post
|
# Append post
|
||||||
def _thread_proc(*argv, **kwargs):
|
def _thread_proc(*argv, **kwargs):
|
||||||
@ -45,8 +46,8 @@ class List:
|
|||||||
target=_thread_proc,
|
target=_thread_proc,
|
||||||
args=(
|
args=(
|
||||||
int(entry["id"][1:]),
|
int(entry["id"][1:]),
|
||||||
int(img_src[0]),
|
img_src.dir,
|
||||||
img_src[1].split("_")[1].split(".")[0], # Thumbnail_[id].jpg
|
img_src.id,
|
||||||
img["alt"].split(" "),
|
img["alt"].split(" "),
|
||||||
)
|
)
|
||||||
))
|
))
|
||||||
|
|||||||
48
py34/url.py
Normal file
48
py34/url.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
from urllib.parse import urlparse
|
||||||
|
from os.path import splitext
|
||||||
|
|
||||||
|
|
||||||
|
class ImageURL:
|
||||||
|
def __init__(self, image_dir: int, image_id: str, image_format: str):
|
||||||
|
self.dir: int = image_dir
|
||||||
|
self.id: str = image_id
|
||||||
|
self.format: str = image_format
|
||||||
|
|
||||||
|
|
||||||
|
class ThumbnailURL:
|
||||||
|
def __init__(self, image_dir: int, image_id: str):
|
||||||
|
self.dir: int = image_dir
|
||||||
|
self.id: str = image_id
|
||||||
|
|
||||||
|
|
||||||
|
def parse_image_url(url: str) -> ImageURL:
|
||||||
|
url = urlparse(url)
|
||||||
|
if url.hostname != "wimg.rule34.xxx":
|
||||||
|
raise Exception("Invalid URL hostname")
|
||||||
|
|
||||||
|
path = list(filter(bool, url.path.split("/")))
|
||||||
|
if len(path) != 3 or path[0] != "images":
|
||||||
|
raise Exception("Invalid URL path")
|
||||||
|
|
||||||
|
file = splitext(path[2])
|
||||||
|
return ImageURL(
|
||||||
|
int(path[1]),
|
||||||
|
file[0],
|
||||||
|
file[1],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_thumbnail_url(url: str) -> ThumbnailURL:
|
||||||
|
url = urlparse(url)
|
||||||
|
if url.hostname != "wimg.rule34.xxx":
|
||||||
|
raise Exception("Invalid URL hostname")
|
||||||
|
|
||||||
|
path = list(filter(bool, url.path.split("/")))
|
||||||
|
if len(path) != 3 or path[0] != "thumbnails":
|
||||||
|
raise Exception("Invalid URL path")
|
||||||
|
|
||||||
|
file = splitext(path[2].split("_")[1])
|
||||||
|
return ThumbnailURL(
|
||||||
|
int(path[1]),
|
||||||
|
file[0],
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user