diff --git a/py34/list.py b/py34/list.py index c17c120..2d46627 100644 --- a/py34/list.py +++ b/py34/list.py @@ -1,9 +1,10 @@ from .post import Post -from .scraper import scraper, ScraperException +from .scraper import scraper, ScraperException, CaptchaException from .url import parse_thumbnail_url -from .dockid import is_nochick, is_toodeep +from .dockid import is_nochick, is_toodeep, is_captcha from .thread import Pool import urllib.parse +from retry import retry class ListException(Exception): @@ -12,6 +13,7 @@ class ListException(Exception): self.document = document +@retry(CaptchaException, tries=5, delay=3, jitter=2) class List: def __init__(self, tags: list[str], offset: int = 0, fetch_thumbnails: bool = True): self.posts: list[Post] = [] @@ -25,6 +27,9 @@ class List: if is_toodeep(document): raise ListException(document, "Search to deep") + if is_captcha(document): + raise CaptchaException("Received captcha") + try: for entry in document.find_all("div", {"class": "image-list"})[0].children: # Skip garbage diff --git a/py34/scraper.py b/py34/scraper.py index 01bcf06..ae10bd0 100644 --- a/py34/scraper.py +++ b/py34/scraper.py @@ -11,6 +11,16 @@ class ScraperException(Exception): self.response = res +class CaptchaException(Exception): + def __init__(self, *argv: any): + # Reset scraper + global scraper + scraper = Scraper() + + # Construct the exception + super(Exception, *argv) + + class Scraper: def __init__(self): self._scraper: CloudScraper = CloudScraper()