Added is_view check, fixed exception constructors, added sample image

This commit is contained in:
2025-08-09 02:04:45 +02:00
parent f879535b25
commit e4814f8a37
4 changed files with 50 additions and 16 deletions

View File

@ -32,3 +32,11 @@ def is_nochick(data: BeautifulSoup | str | bytes) -> bool:
def is_toodeep(data: BeautifulSoup | str | bytes) -> bool:
return _is_header(bs4(data), "unable to search this deep in temporarily.")
def is_view(data: BeautifulSoup | str | bytes) -> bool:
doc = bs4(data)
els = doc.find_all("img", attrs = {"id": "image"})
if len(els) == 1:
return True
return False

View File

@ -9,7 +9,7 @@ from retry import retry
class ListException(Exception):
def __init__(self, documnet: bytes, *argv):
super(self, *argv)
super().__init__(self, *argv)
self.document = document

View File

@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
class ScraperException(Exception):
def __init__(self, res: Response, *argv: any):
super(Exception, *argv)
super().__init__(self, *argv)
self.response = res
@ -18,7 +18,7 @@ class CaptchaException(Exception):
scraper = Scraper()
# Construct the exception
super(Exception, *argv)
super().__init__(self, *argv)
class Scraper:

View File

@ -1,11 +1,17 @@
from .post import Post
from .url import ImageURL, ThumbnailURL, parse_image_url
from .url import ImageURL, SampleURL, ThumbnailURL, parse_image_url
from .scraper import scraper
from .dockid import is_view
from io import BytesIO
from PIL import Image
from PIL.ImageFile import ImageFile
class ViewMissingException(Exception):
def __init__(self, *args, **kwargs):
super().__init__(self, *args, **kwargs)
class ViewTags:
def __init__(self, cpr: list[str], chr: list[str], art: list[str], gen: list[str], met: list[str]):
self.copyright = cpr.copy()
@ -35,14 +41,20 @@ class ViewTags:
class View:
def __init__(self, id: int):
self.id = int(id)
self._image_data: bytes | None = None
self._image: ImageFile | None = None
self._image_url: ImageURL | None = None
self._thumb_data: bytes | None = None
self._thumb: ImageFile | None = None
self._thumb_url: ThumbnailURL | None = None
self._image_data: bytes | None = None
self._image: ImageFile | None = None
self.image_url: ImageURL | None = None
self._sample_data: bytes | None = None
self._sample: ImageFile | None = None
self.sample_url: ThumbnailURL | None = None
self._thumb_data: bytes | None = None
self._thumb: ImageFile | None = None
self.thumb_url: ThumbnailURL | None = None
document = scraper.get_html(f"https://rule34.xxx/index.php?page=post&s=view&id={id}")
if not is_view(document):
raise ViewMissingException("View does not exist")
tag_bar = document.find_all("ul", attrs={"id": "tag-sidebar"})[0]
cpr = []
chr = []
@ -71,9 +83,10 @@ class View:
label = ent.text.lower().strip()
match label:
case "original image":
self._image_url = parse_image_url(ent.find_all("a")[0]["href"])
self.image_url = parse_image_url(ent.find_all("a")[0]["href"])
self._thumb_url = ThumbnailURL(self._image_url.dir, self._image_url.id)
self.sample_url = SampleURL(self.image_url.dir, self.image_url.id)
self.thumb_url = ThumbnailURL(self.image_url.dir, self.image_url.id)
def get_image(self) -> ImageFile:
@ -86,10 +99,23 @@ class View:
def get_image_data(self) -> bytes:
if self._image_data is not None:
return self._image_data
self._image_data = scraper.get(self._image_url)
self._image_data = scraper.get(self.image_url)
return self._image_data
def get_sample(self) -> ImageFile:
if self._sample is not None:
return self._sample
self._sample = Image.open(BytesIO(self.get_sample_data()))
return self._sample
def get_sample_data(self) -> bytes:
if self._sample_data is not None:
return self._sample_data
self._sample_data = scraper.get(self.sample_url)
def get_thumbnail(self) -> ImageFile:
if self._thumb is not None:
return self._thumb
@ -100,14 +126,14 @@ class View:
def get_thumbnail_data(self) -> bytes:
if self._thumb_data is not None:
return self._thumb_data
self._thumb_data = scraper.get(self._thumb_url)
self._thumb_data = scraper.get(self.thumb_url)
def to_post(self) -> Post:
return Post(
self.id,
self._image_url.dir,
self._image_url.id,
self.image_url.dir,
self.image_url.id,
self.tags.to_list(),
)