Retry if received captcha

This commit is contained in:
2025-08-04 14:58:07 +02:00
parent 8dd0b576a2
commit 6834a2d056
2 changed files with 17 additions and 2 deletions

View File

@ -1,9 +1,10 @@
from .post import Post
from .scraper import scraper, ScraperException
from .scraper import scraper, ScraperException, CaptchaException
from .url import parse_thumbnail_url
from .dockid import is_nochick, is_toodeep
from .dockid import is_nochick, is_toodeep, is_captcha
from .thread import Pool
import urllib.parse
from retry import retry
class ListException(Exception):
@ -12,6 +13,7 @@ class ListException(Exception):
self.document = document
@retry(CaptchaException, tries=5, delay=3, jitter=2)
class List:
def __init__(self, tags: list[str], offset: int = 0, fetch_thumbnails: bool = True):
self.posts: list[Post] = []
@ -25,6 +27,9 @@ class List:
if is_toodeep(document):
raise ListException(document, "Search to deep")
if is_captcha(document):
raise CaptchaException("Received captcha")
try:
for entry in document.find_all("div", {"class": "image-list"})[0].children:
# Skip garbage

View File

@ -11,6 +11,16 @@ class ScraperException(Exception):
self.response = res
class CaptchaException(Exception):
def __init__(self, *argv: any):
# Reset scraper
global scraper
scraper = Scraper()
# Construct the exception
super(Exception, *argv)
class Scraper:
def __init__(self):
self._scraper: CloudScraper = CloudScraper()