From d112ea4ce930026f3627fca1b2d49ab3f00a4ab4 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Mon, 4 Aug 2025 14:34:04 +0200
Subject: [PATCH 01/23] Added threadpool

---
 py34/list.py   | 13 +++++--------
 py34/thread.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 8 deletions(-)
 create mode 100644 py34/thread.py

diff --git a/py34/list.py b/py34/list.py
index 8bc44b7..e8491c2 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -2,8 +2,8 @@ from .post import Post
 from .scraper import scraper, ScraperException
 from .url import parse_thumbnail_url
 from .dockid import is_nochick, is_toodeep
+from .thread import Pool
 import urllib.parse
-from threading import Thread
 
 
 class ListException(Exception):
@@ -55,13 +55,10 @@ class List:
 
             # Download thumbnails
             if fetch_thumbnails:
-                threads = [Thread(target=Post.get_thumbnail, args=(post,)) for post in self.posts]
-
-                for thread in threads:
-                    thread.start()
-
-                for thread in threads:
-                    thread.join()
+                pool = Pool()
+                for post in self.posts:
+                    pool.submit(Post.get_thumbnail_data, post)
+                pool.join()
 
         except ScraperException as ex:
             raise ex
diff --git a/py34/thread.py b/py34/thread.py
new file mode 100644
index 0000000..a15b37b
--- /dev/null
+++ b/py34/thread.py
@@ -0,0 +1,37 @@
+from threading import Thread
+from typing import Callable
+
+
+class Pool:
+    def __init__(self, max_workers: int = 5):
+        self.max_workers = max_workers
+        self.jobs: list[Thread] = []
+        self.workers: list[Thread] = []
+
+    def submit(self, func: Callable, *vargs, **kwargs):
+        def proc(self, func: Callable, *vargs, **kwargs):
+            func(*vargs, **kwargs)
+            self._pool_proc()
+
+        self.jobs.append(Thread(
+            target = proc,
+            args = (self, func, *vargs, ),
+            kwargs = kwargs
+        ))
+
+        self._pool_proc()
+
+    def join(self):
+        while len(self.workers) != 0:
+            self.workers[-1].join()
+            self._pool_proc()
+
+    def _pool_proc(self):
+        # Remove any dead workers
+        self.workers = list(filter(Thread.is_alive, self.workers))
+
+        # Process jobs if any
+        while len(self.workers) < self.max_workers and len(self.jobs) != 0:
+            job = self.jobs.pop()
+            job.start()
+            self.workers.append(job)

From 8dd0b576a2706d9dc845fc4b603d79a0fe7fe503 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Mon, 4 Aug 2025 14:40:38 +0200
Subject: [PATCH 02/23] Removed empty "tags"

---
 py34/list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py34/list.py b/py34/list.py
index e8491c2..c17c120 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -50,7 +50,7 @@ class List:
                     post_id,
                     img_src.dir,
                     img_src.id,
-                    img["alt"].split(" "),
+                    sorted(list(filter(bool, map(str.strip, img["alt"].split(" "))))),
                 ))
 
             # Download thumbnails

From 6834a2d0567c67b6584833b9f88a7375ecab2607 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Mon, 4 Aug 2025 14:58:07 +0200
Subject: [PATCH 03/23] Retry if received captcha

---
 py34/list.py    |  9 +++++++--
 py34/scraper.py | 10 ++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/py34/list.py b/py34/list.py
index c17c120..2d46627 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -1,9 +1,10 @@
 from .post import Post
-from .scraper import scraper, ScraperException
+from .scraper import scraper, ScraperException, CaptchaException
 from .url import parse_thumbnail_url
-from .dockid import is_nochick, is_toodeep
+from .dockid import is_nochick, is_toodeep, is_captcha
 from .thread import Pool
 import urllib.parse
+from retry import retry
 
 
 class ListException(Exception):
@@ -12,6 +13,7 @@ class ListException(Exception):
         self.document = document
 
 
+@retry(CaptchaException, tries=5, delay=3, jitter=2)
 class List:
     def __init__(self, tags: list[str], offset: int = 0, fetch_thumbnails: bool = True):
         self.posts: list[Post] = []
@@ -25,6 +27,9 @@ class List:
         if is_toodeep(document):
             raise ListException(document, "Search to deep")
 
+        if is_captcha(document):
+            raise CaptchaException("Received captcha")
+
         try:
             for entry in document.find_all("div", {"class": "image-list"})[0].children:
                 # Skip garbage
diff --git a/py34/scraper.py b/py34/scraper.py
index 01bcf06..ae10bd0 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -11,6 +11,16 @@ class ScraperException(Exception):
         self.response = res
 
 
+class CaptchaException(Exception):
+    def __init__(self, *argv: any):
+        # Reset scraper
+        global scraper
+        scraper = Scraper()
+
+        # Construct the exception
+        super(Exception, *argv)
+
+
 class Scraper:
     def __init__(self):
         self._scraper: CloudScraper = CloudScraper()

From 3831b951610f658bedc17484ce4a2f9b324e949e Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Wed, 6 Aug 2025 04:35:18 +0200
Subject: [PATCH 04/23] Changed to ThreadPoolExecutor

---
 py34/list.py   |  9 ++++-----
 py34/thread.py | 37 -------------------------------------
 2 files changed, 4 insertions(+), 42 deletions(-)
 delete mode 100644 py34/thread.py

diff --git a/py34/list.py b/py34/list.py
index 2d46627..b02cc16 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -2,7 +2,7 @@ from .post import Post
 from .scraper import scraper, ScraperException, CaptchaException
 from .url import parse_thumbnail_url
 from .dockid import is_nochick, is_toodeep, is_captcha
-from .thread import Pool
+from concurrent.futures import ThreadPoolExecutor
 import urllib.parse
 from retry import retry
 
@@ -60,10 +60,9 @@ class List:
 
             # Download thumbnails
             if fetch_thumbnails:
-                pool = Pool()
-                for post in self.posts:
-                    pool.submit(Post.get_thumbnail_data, post)
-                pool.join()
+                with ThreadPoolExecutor(max_workers=5) as pool:
+                    for post in self.posts:
+                        pool.submit(Post.get_thumbnail_data, post)
 
         except ScraperException as ex:
             raise ex
diff --git a/py34/thread.py b/py34/thread.py
deleted file mode 100644
index a15b37b..0000000
--- a/py34/thread.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from threading import Thread
-from typing import Callable
-
-
-class Pool:
-    def __init__(self, max_workers: int = 5):
-        self.max_workers = max_workers
-        self.jobs: list[Thread] = []
-        self.workers: list[Thread] = []
-
-    def submit(self, func: Callable, *vargs, **kwargs):
-        def proc(self, func: Callable, *vargs, **kwargs):
-            func(*vargs, **kwargs)
-            self._pool_proc()
-
-        self.jobs.append(Thread(
-            target = proc,
-            args = (self, func, *vargs, ),
-            kwargs = kwargs
-        ))
-
-        self._pool_proc()
-
-    def join(self):
-        while len(self.workers) != 0:
-            self.workers[-1].join()
-            self._pool_proc()
-
-    def _pool_proc(self):
-        # Remove any dead workers
-        self.workers = list(filter(Thread.is_alive, self.workers))
-
-        # Process jobs if any
-        while len(self.workers) < self.max_workers and len(self.jobs) != 0:
-            job = self.jobs.pop()
-            job.start()
-            self.workers.append(job)

From 435fcd2fc3a6e8e3139423f896e1acd1675d355a Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Fri, 8 Aug 2025 06:01:20 +0200
Subject: [PATCH 05/23] Add "is None" checks, instead of infering

---
 py34/post.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/py34/post.py b/py34/post.py
index 93a344a..cd8550a 100644
--- a/py34/post.py
+++ b/py34/post.py
@@ -13,7 +13,7 @@ class Post:
         self.tags:            list[str] = tags.copy()
 
         self._thumbnail_data:   bytes | None = thumbnail
-        self._thumbnail:    ImageFile | None = Image.open(BytesIO(thumbnail)) if thumbnail else None
+        self._thumbnail:    ImageFile | None = None
 
         self._image_format: str       | None = None
         self._image_data:   bytes     | None = None
@@ -21,14 +21,14 @@ class Post:
 
 
     def get_thumbnail(self) -> ImageFile:
-        if self._thumbnail:
+        if self._thumbnail is not None:
             return self._thumbnail
         self._thumbnail = Image.open(BytesIO(self.get_thumbnail_data()))
         return self._thumbnail
 
 
     def get_thumbnail_data(self) -> bytes:
-        if self._thumbnail_data:
+        if self._thumbnail_data is not None:
             return self._thumbnail_data
         self._thumbnail_data = scraper.get(ThumbnailURL(self.image_dir, self.image_id))
         return self._thumbnail_data

From 41ecfaec9019b0c79bd7006a445c3688bd981d3a Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Fri, 8 Aug 2025 23:40:23 +0200
Subject: [PATCH 06/23] Added SampleURL

---
 py34/url.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/py34/url.py b/py34/url.py
index 125f21f..e5568bf 100644
--- a/py34/url.py
+++ b/py34/url.py
@@ -15,6 +15,16 @@ class ImageURL:
         return f"https://wimg.rule34.xxx//images/{self.dir}/{self.id}.{self.format}"
 
 
+class SampleURL:
+    def __init__(self, image_dir: int, image_id: str, image_format: str):
+        self.dir:    int = image_dir
+        self.id:     str = image_id
+        self.format: str = image_format.lstrip(".")
+
+    def __str__(self) -> str:
+        return f"https://rule34.xxx//samples/{self.dir}/sample_{self.id}.{self.format}"
+
+
 class ThumbnailURL:
     def __init__(self, image_dir: int, image_id: str):
         self.dir: int = image_dir

From f879535b25e93307790459b89c25d1288a000fee Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 01:07:29 +0200
Subject: [PATCH 07/23] Samples are always JPG

---
 py34/url.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/py34/url.py b/py34/url.py
index e5568bf..a2f0e5b 100644
--- a/py34/url.py
+++ b/py34/url.py
@@ -16,13 +16,12 @@ class ImageURL:
 
 
 class SampleURL:
-    def __init__(self, image_dir: int, image_id: str, image_format: str):
+    def __init__(self, image_dir: int, image_id: str):
         self.dir:    int = image_dir
         self.id:     str = image_id
-        self.format: str = image_format.lstrip(".")
 
     def __str__(self) -> str:
-        return f"https://rule34.xxx//samples/{self.dir}/sample_{self.id}.{self.format}"
+        return f"https://rule34.xxx//samples/{self.dir}/sample_{self.id}.jpg"
 
 
 class ThumbnailURL:

From e4814f8a37852648bf9592c5f1016b16a74be775 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 02:04:45 +0200
Subject: [PATCH 08/23] Added is_view check, fixed exception constructors,
 added sample image

---
 py34/dockid.py  |  8 ++++++++
 py34/list.py    |  2 +-
 py34/scraper.py |  4 ++--
 py34/view.py    | 52 ++++++++++++++++++++++++++++++++++++-------------
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/py34/dockid.py b/py34/dockid.py
index 29de63d..d4e599c 100644
--- a/py34/dockid.py
+++ b/py34/dockid.py
@@ -32,3 +32,11 @@ def is_nochick(data: BeautifulSoup | str | bytes) -> bool:
 
 def is_toodeep(data: BeautifulSoup | str | bytes) -> bool:
     return _is_header(bs4(data), "unable to search this deep in temporarily.")
+
+
+def is_view(data: BeautifulSoup | str | bytes) -> bool:
+    doc = bs4(data)
+    els = doc.find_all("img", attrs = {"id": "image"})
+    if len(els) == 1:
+        return True
+    return False
diff --git a/py34/list.py b/py34/list.py
index b02cc16..7648931 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -9,7 +9,7 @@ from retry import retry
 
 class ListException(Exception):
     def __init__(self, documnet: bytes, *argv):
-        super(self, *argv)
+        super().__init__(self, *argv)
         self.document = document
 
 
diff --git a/py34/scraper.py b/py34/scraper.py
index ae10bd0..7f0eba8 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
 
 class ScraperException(Exception):
     def __init__(self, res: Response, *argv: any):
-        super(Exception, *argv)
+        super().__init__(self, *argv)
         self.response = res
 
 
@@ -18,7 +18,7 @@ class CaptchaException(Exception):
         scraper = Scraper()
 
         # Construct the exception
-        super(Exception, *argv)
+        super().__init__(self, *argv)
 
 
 class Scraper:
diff --git a/py34/view.py b/py34/view.py
index d37ebf7..7b19f89 100644
--- a/py34/view.py
+++ b/py34/view.py
@@ -1,11 +1,17 @@
 from .post import Post
-from .url import ImageURL, ThumbnailURL, parse_image_url
+from .url import ImageURL, SampleURL, ThumbnailURL, parse_image_url
 from .scraper import scraper
+from .dockid import is_view
 from io import BytesIO
 from PIL import Image
 from PIL.ImageFile import ImageFile
 
 
+class ViewMissingException(Exception):
+    def __init__(self, *args, **kwargs):
+        super().__init__(self, *args, **kwargs)
+
+
 class ViewTags:
     def __init__(self, cpr: list[str], chr: list[str], art: list[str], gen: list[str], met: list[str]):
         self.copyright = cpr.copy()
@@ -35,14 +41,20 @@ class ViewTags:
 class View:
     def __init__(self, id: int):
         self.id = int(id)
-        self._image_data: bytes        | None = None
-        self._image:      ImageFile    | None = None
-        self._image_url:  ImageURL     | None = None
-        self._thumb_data: bytes        | None = None
-        self._thumb:      ImageFile    | None = None
-        self._thumb_url:  ThumbnailURL | None = None
+        self._image_data:  bytes        | None = None
+        self._image:       ImageFile    | None = None
+        self.image_url:    ImageURL     | None = None
+        self._sample_data: bytes        | None = None
+        self._sample:      ImageFile    | None = None
+        self.sample_url:   ThumbnailURL | None = None
+        self._thumb_data:  bytes        | None = None
+        self._thumb:       ImageFile    | None = None
+        self.thumb_url:    ThumbnailURL | None = None
         document = scraper.get_html(f"https://rule34.xxx/index.php?page=post&s=view&id={id}")
 
+        if not is_view(document):
+            raise ViewMissingException("View does not exist")
+
         tag_bar = document.find_all("ul", attrs={"id": "tag-sidebar"})[0]
         cpr = []
         chr = []
@@ -71,9 +83,10 @@ class View:
                 label = ent.text.lower().strip()
                 match label:
                     case "original image":
-                        self._image_url = parse_image_url(ent.find_all("a")[0]["href"])
+                        self.image_url = parse_image_url(ent.find_all("a")[0]["href"])
 
-        self._thumb_url = ThumbnailURL(self._image_url.dir, self._image_url.id)
+        self.sample_url = SampleURL(self.image_url.dir, self.image_url.id)
+        self.thumb_url = ThumbnailURL(self.image_url.dir, self.image_url.id)
 
 
     def get_image(self) -> ImageFile:
@@ -86,10 +99,23 @@ class View:
     def get_image_data(self) -> bytes:
         if self._image_data is not None:
             return self._image_data
-        self._image_data = scraper.get(self._image_url)
+        self._image_data = scraper.get(self.image_url)
         return self._image_data
 
 
+    def get_sample(self) -> ImageFile:
+        if self._sample is not None:
+            return self._sample
+        self._sample = Image.open(BytesIO(self.get_sample_data()))
+        return self._sample
+
+
+    def get_sample_data(self) -> bytes:
+        if self._sample_data is not None:
+            return self._sample_data
+        self._sample_data = scraper.get(self.sample_url)
+
+
     def get_thumbnail(self) -> ImageFile:
         if self._thumb is not None:
             return self._thumb
@@ -100,14 +126,14 @@ class View:
     def get_thumbnail_data(self) -> bytes:
         if self._thumb_data is not None:
             return self._thumb_data
-        self._thumb_data = scraper.get(self._thumb_url)
+        self._thumb_data = scraper.get(self.thumb_url)
 
 
     def to_post(self) -> Post:
         return Post(
             self.id,
-            self._image_url.dir,
-            self._image_url.id,
+            self.image_url.dir,
+            self.image_url.id,
             self.tags.to_list(),
         )
 

From 8fa6e2cce42bf7d39c9c5a63fd24d67615695435 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 02:06:04 +0200
Subject: [PATCH 09/23] Made urls consistent

---
 py34/url.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py34/url.py b/py34/url.py
index a2f0e5b..62da613 100644
--- a/py34/url.py
+++ b/py34/url.py
@@ -21,7 +21,7 @@ class SampleURL:
         self.id:     str = image_id
 
     def __str__(self) -> str:
-        return f"https://rule34.xxx//samples/{self.dir}/sample_{self.id}.jpg"
+        return f"https://wimg.rule34.xxx//samples/{self.dir}/sample_{self.id}.jpg"
 
 
 class ThumbnailURL:

From 7ca4c4bfacba23178fde375c65fdcc7941e68685 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 02:25:50 +0200
Subject: [PATCH 10/23] Added scraper.reset

---
 py34/scraper.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/py34/scraper.py b/py34/scraper.py
index 7f0eba8..78df2fd 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -37,6 +37,9 @@ class Scraper:
     def _retry_get(self, url: str, body: bool) -> bytes | Response:
         return self._get(url, body=body)
 
+    def reset(self):
+        self._scraper = CloudScraper()
+
     def get(self, url: str, retry: bool = True, body: bool = True):
         if retry:
             return self._retry_get(url, body=body)

From 8f228bde36f5119260619d2e7d6107850bbe1b5a Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 02:27:23 +0200
Subject: [PATCH 11/23] Close response handles

---
 py34/scraper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/py34/scraper.py b/py34/scraper.py
index 78df2fd..272f67c 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -31,7 +31,9 @@ class Scraper:
             return res
         if res.status_code != 200:
             raise ScraperException(res, "Request did not succeed")
-        return res.content
+        content = res.content
+        res.close()
+        return content
 
     @retry(Exception, tries=5, delay=3)
     def _retry_get(self, url: str, body: bool) -> bytes | Response:

From 14ed66aad75b53951cf23d05bb51eedb0d54ffd9 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 04:24:16 +0200
Subject: [PATCH 12/23] Added empty Post constructor

---
 py34/post.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/py34/post.py b/py34/post.py
index cd8550a..2e34f69 100644
--- a/py34/post.py
+++ b/py34/post.py
@@ -20,6 +20,10 @@ class Post:
         self._image:        ImageFile | None = None
 
 
+    def empty(id: int) -> "Post":
+        return Post(id, 0, "00", [], b"")
+
+
     def get_thumbnail(self) -> ImageFile:
         if self._thumbnail is not None:
             return self._thumbnail

From c9928342c2ad150d329c8e9adff8e08fdcee0e25 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 04:24:37 +0200
Subject: [PATCH 13/23] Added ViewURL

---
 py34/url.py  | 8 ++++++++
 py34/view.py | 4 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/py34/url.py b/py34/url.py
index 62da613..e784d9b 100644
--- a/py34/url.py
+++ b/py34/url.py
@@ -5,6 +5,14 @@ from os.path import splitext
 IMAGE_FORMATS = ["jpeg", "jpg", "png", "gif", "mp4", "webm"]
 
 
+class ViewURL:
+    def __init__(self, post_id: int):
+        self.id = post_id
+
+    def __str__(self) -> str:
+        return f"https://rule34.xxx/index.php?page=post&s=view&id={self.id}"
+
+
 class ImageURL:
     def __init__(self, image_dir: int, image_id: str, image_format: str):
         self.dir:    int = image_dir
diff --git a/py34/view.py b/py34/view.py
index 7b19f89..313d74f 100644
--- a/py34/view.py
+++ b/py34/view.py
@@ -1,5 +1,5 @@
 from .post import Post
-from .url import ImageURL, SampleURL, ThumbnailURL, parse_image_url
+from .url import ViewURL, ImageURL, SampleURL, ThumbnailURL, parse_image_url
 from .scraper import scraper
 from .dockid import is_view
 from io import BytesIO
@@ -50,7 +50,7 @@ class View:
         self._thumb_data:  bytes        | None = None
         self._thumb:       ImageFile    | None = None
         self.thumb_url:    ThumbnailURL | None = None
-        document = scraper.get_html(f"https://rule34.xxx/index.php?page=post&s=view&id={id}")
+        document = scraper.get_html(ViewURL(self.id))
 
         if not is_view(document):
             raise ViewMissingException("View does not exist")

From dddb0e95837adbbbcef745e1e4f5e65c9b3a75f2 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 04:25:24 +0200
Subject: [PATCH 14/23] Fixed handle leak

---
 py34/scraper.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/py34/scraper.py b/py34/scraper.py
index 272f67c..303c0d0 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -27,13 +27,12 @@ class Scraper:
 
     def _get(self, url: str, body: bool) -> bytes | Response:
         res: Response = self._scraper.get(url)
+        res.close()
         if not body:
             return res
         if res.status_code != 200:
             raise ScraperException(res, "Request did not succeed")
-        content = res.content
-        res.close()
-        return content
+        return res.content
 
     @retry(Exception, tries=5, delay=3)
     def _retry_get(self, url: str, body: bool) -> bytes | Response:

From 12038f9477d7fc33dfbeda260f929670c3e9174c Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 04:31:37 +0200
Subject: [PATCH 15/23] Fixed view identification

---
 py34/dockid.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/py34/dockid.py b/py34/dockid.py
index d4e599c..5bbd54e 100644
--- a/py34/dockid.py
+++ b/py34/dockid.py
@@ -37,6 +37,6 @@ def is_toodeep(data: BeautifulSoup | str | bytes) -> bool:
 def is_view(data: BeautifulSoup | str | bytes) -> bool:
     doc = bs4(data)
     els = doc.find_all("img", attrs = {"id": "image"})
-    if len(els) == 1:
-        return True
-    return False
+    if len(els) == 0:
+        return False
+    return True

From a0e38071d88bc746fdfb00bfe3a133c979933849 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 05:12:15 +0200
Subject: [PATCH 16/23] When bot detection kicks in, timeout and reload
 CloudScraper

---
 py34/scraper.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/py34/scraper.py b/py34/scraper.py
index 303c0d0..a4f654a 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -3,6 +3,7 @@ from cloudscraper import CloudScraper
 from requests import Response
 from retry import retry
 from bs4 import BeautifulSoup
+import time
 
 
 class ScraperException(Exception):
@@ -26,15 +27,20 @@ class Scraper:
         self._scraper: CloudScraper = CloudScraper()
 
     def _get(self, url: str, body: bool) -> bytes | Response:
-        res: Response = self._scraper.get(url)
-        res.close()
-        if not body:
-            return res
-        if res.status_code != 200:
-            raise ScraperException(res, "Request did not succeed")
-        return res.content
+        while True:
+            res: Response = self._scraper.get(url)
+            res.close()
+            if res.status_code == 429:
+                self.reset()
+                time.sleep(10)
+                continue
+            if not body:
+                return res
+            if res.status_code != 200:
+                raise ScraperException(res, "Request did not succeed")
+            return res.content
 
-    @retry(Exception, tries=5, delay=3)
+    @retry(Exception, tries=5, delay=5)
     def _retry_get(self, url: str, body: bool) -> bytes | Response:
         return self._get(url, body=body)
 

From abdcef0a80086f41208a9a46660d8d8f8ef70239 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 16:13:25 +0200
Subject: [PATCH 17/23] Fixed handle leak

---
 py34/scraper.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/py34/scraper.py b/py34/scraper.py
index a4f654a..4a7054a 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -16,7 +16,7 @@ class CaptchaException(Exception):
     def __init__(self, *argv: any):
         # Reset scraper
         global scraper
-        scraper = Scraper()
+        scraper.reset()
 
         # Construct the exception
         super().__init__(self, *argv)
@@ -44,7 +44,11 @@ class Scraper:
     def _retry_get(self, url: str, body: bool) -> bytes | Response:
         return self._get(url, body=body)
 
+    def close(self):
+        self._scraper.close()
+
     def reset(self):
+        self._scraper.close()
         self._scraper = CloudScraper()
 
     def get(self, url: str, retry: bool = True, body: bool = True):

From 462d24ab5e4e87dd1413fac1b8d00af50ea5c869 Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 17:00:38 +0200
Subject: [PATCH 18/23] Ignore removed posts

---
 py34/list.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/py34/list.py b/py34/list.py
index 7648931..4cdb5cb 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -46,9 +46,17 @@ class List:
                 # Extract image
                 img = entry.find_all("img")[0]
                 if "src" in img.attrs:
-                    img_src = parse_thumbnail_url(img["src"])
+                    img_src = img["src"]
                 else:
-                    img_src = parse_thumbnail_url(img["data-cfsrc"])
+                    img_src = img["data-cfsrc"]
+
+                # Is it a deleted post?
+                if img_src.split('?')[0].endswith("thumbnail_.jpg"):
+                    # Post has been deleted, continue
+                    continue
+
+                # Parse thumbnail url
+                img_src = parse_thumbnail_url(img_src)
 
                 # Append post
                 self.posts.append(Post(

From 68b6a505c115546b5b121cc107832dcf8c20d53e Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 17:00:56 +0200
Subject: [PATCH 19/23] Close handle when deleted

---
 py34/scraper.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/py34/scraper.py b/py34/scraper.py
index 4a7054a..52a75a8 100644
--- a/py34/scraper.py
+++ b/py34/scraper.py
@@ -26,6 +26,9 @@ class Scraper:
     def __init__(self):
         self._scraper: CloudScraper = CloudScraper()
 
+    def __del__(self):
+        self.close()
+
     def _get(self, url: str, body: bool) -> bytes | Response:
         while True:
             res: Response = self._scraper.get(url)

From 6fc820b3970014b9c545b70549747d8aba9a0fcf Mon Sep 17 00:00:00 2001
From: Tomuxs <tomas@lesbian.ddns.net>
Date: Sat, 9 Aug 2025 17:12:56 +0200
Subject: [PATCH 20/23] Added ListURL

---
 py34/list.py |  5 ++---
 py34/url.py  | 12 +++++++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/py34/list.py b/py34/list.py
index 4cdb5cb..18880f6 100644
--- a/py34/list.py
+++ b/py34/list.py
@@ -1,6 +1,6 @@
 from .post import Post
 from .scraper import scraper, ScraperException, CaptchaException
-from .url import parse_thumbnail_url
+from .url import parse_thumbnail_url, ListURL
 from .dockid import is_nochick, is_toodeep, is_captcha
 from concurrent.futures import ThreadPoolExecutor
 import urllib.parse
@@ -18,8 +18,7 @@ class List:
     def __init__(self, tags: list[str], offset: int = 0, fetch_thumbnails: bool = True):
         self.posts: list[Post] = []
 
-        tags = "+".join(map(urllib.parse.quote_plus, tags))
-        document = scraper.get_html(f"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={offset}")
+        document = scraper.get_html(ListURL(tags, offset))
 
         if is_nochick(document):
             return []
diff --git a/py34/url.py b/py34/url.py
index e784d9b..f76914c 100644
--- a/py34/url.py
+++ b/py34/url.py
@@ -1,4 +1,4 @@
-from urllib.parse import urlparse
+from urllib.parse import urlparse, quote_plus
 from os.path import splitext
 
 
@@ -13,6 +13,16 @@ class ViewURL:
         return f"https://rule34.xxx/index.php?page=post&s=view&id={self.id}"
 
 
+class ListURL:
+    def __init__(self, tags: list[str], offset: int):
+        self.tags = tags
+        self.offset = offset
+
+    def __str__(self):
+        tags = "+".join(map(quote_plus, self.tags))
+        return f"https://rule34.xxx/index.php?page=post&s=list&tags={tags}&pid={self.offset}"
+
+
 class ImageURL:
     def __init__(self, image_dir: int, image_id: str, image_format: str):
         self.dir:    int = image_dir

From fb06339cc773f334aee6a5789a5f79234a8fbc6e Mon Sep 17 00:00:00 2001
From: Tomas <tomas@lesbian.ddns.net>
Date: Sun, 10 Aug 2025 15:26:14 +0200
Subject: [PATCH 21/23] Added flask

---
 requirements.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 58c4edd..0424155 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,16 @@
 beautifulsoup4==4.13.4
+blinker==1.9.0
 bs4==0.0.2
 certifi==2025.7.14
 charset-normalizer==3.4.2
+click==8.2.1
 cloudscraper==1.2.71
 decorator==5.2.1
+Flask==3.1.1
 idna==3.10
+itsdangerous==2.2.0
+Jinja2==3.1.6
+MarkupSafe==3.0.2
 pillow==11.3.0
 py==1.11.0
 pyparsing==3.2.3
@@ -14,3 +20,4 @@ retry==0.9.2
 soupsieve==2.7
 typing_extensions==4.14.1
 urllib3==2.5.0
+Werkzeug==3.1.3

From d0dec584a81f4df2098659c4ca7091c4d8bcad59 Mon Sep 17 00:00:00 2001
From: Tomas <tomas@lesbian.ddns.net>
Date: Sun, 10 Aug 2025 15:49:03 +0200
Subject: [PATCH 22/23] Removed debug print

---
 scraper/client/__main__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scraper/client/__main__.py b/scraper/client/__main__.py
index 4de36fa..c1710cd 100644
--- a/scraper/client/__main__.py
+++ b/scraper/client/__main__.py
@@ -9,9 +9,6 @@ import gc
 import py34
 import scraper.job
 
-import os
-print(f"/proc/{os.getpid()}/fd/")
-
 _spinner = 0
 def spinner() -> str:
     global _spinner

From 8e3a7b105a253e9c1dc5e896c5978b0385f2003f Mon Sep 17 00:00:00 2001
From: Tomas <tomas@lesbian.ddns.net>
Date: Sun, 10 Aug 2025 15:50:40 +0200
Subject: [PATCH 23/23] Fixed invalid extension check

---
 scraper/server/__main__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scraper/server/__main__.py b/scraper/server/__main__.py
index b34f18f..1a4c756 100644
--- a/scraper/server/__main__.py
+++ b/scraper/server/__main__.py
@@ -2,7 +2,7 @@ from scraper.config import config
 from flask import Flask, Response, request, render_template, url_for
 from pathlib import Path
 import py34
-from .block import BLOCK_SIZE, list_blocks, load_blocks, load_block, load_block_stats, save_block, loads as parse_block
+from .block import BLOCK_SIZE, list_blocks, load_blocks, load_block, load_block_stats, save_block, loads as parse_block, enttype2ext
 from .job import assign_job, working_on, any_job, jobs
 
 
@@ -107,7 +107,7 @@ def get_image(post_id: int = None):
     return Response(
         status = 307,
         headers = {
-            "Location": str(py34.url.ImageURL(entry.dir, entry.image, entry.ext))
+            "Location": str(py34.url.ImageURL(entry.dir, entry.image, enttype2ext(entry.type)))
         }
     )