Compare commits

...

15 Commits

4 changed files with 24 additions and 21 deletions

View File

@ -2,7 +2,6 @@ from .post import Post
from .scraper import scraper, ScraperException, CaptchaException
from .url import parse_thumbnail_url, ListURL
from .dockid import is_nochick, is_toodeep, is_captcha
from concurrent.futures import ThreadPoolExecutor
import urllib.parse
from retry import retry
@ -67,9 +66,8 @@ class List:
# Download thumbnails
if fetch_thumbnails:
with ThreadPoolExecutor(max_workers=5) as pool:
for post in self.posts:
pool.submit(Post.get_thumbnail_data, post)
for post in self.posts:
post.get_thumbnail_data()
except ScraperException as ex:
raise ex

View File

@ -2,6 +2,7 @@ from .dockid import bs4
from cloudscraper import CloudScraper
from requests import Response
from retry import retry
from time import sleep
from bs4 import BeautifulSoup
@ -30,16 +31,21 @@ class Scraper:
def _request(self, method: str, url: str, body: bool) -> bytes | Response:
while True:
res: Response = self._scraper.request(method, url)
res.close()
if res.status_code == 429:
self.reset()
continue
if not body:
return res
if res.status_code != 200:
raise ScraperException(res, f"Request did not succeed: {method} {url}")
return res.content
print(f"{method} {url}")
with self._scraper.request(method, url) as res:
if res.status_code == 429:
print(f"\x1B[33mOverloaded\x1B[0m {method} {url}")
self.reset()
sleep(5)
continue
if not body:
print(f"\x1B[32mOk\x1B[0m {method} {url}")
return res
if res.status_code != 200:
print(f"\x1B[31mFailed\x1B[0m {method} {url}")
raise ScraperException(res, f"Request did not succeed: {method} {url}")
print(f"\x1B[32mOk\x1B[0m {method} {url}")
return res.content
@retry(Exception, tries=5, delay=5)
def _retry_request(self, method: str, url: str, body: bool) -> bytes | Response:
@ -49,8 +55,9 @@ class Scraper:
self._scraper.close()
def reset(self):
self._scraper.close()
self._scraper = CloudScraper()
pass
# self._scraper.close()
# self._scraper = CloudScraper()
def request(self, method: str, url: str, retry: bool = True, body: bool = True):
if retry:

View File

@ -2,7 +2,7 @@ from urllib.parse import urlparse, quote_plus
from os.path import splitext
IMAGE_FORMATS = ["jpeg", "jpg", "png", "gif", "mp4", "webm"]
IMAGE_FORMATS = ["jpeg", "jpg", "png", "gif"]
VIDEO_FORMATS = ["mp4", "webm"]

View File

@ -1,6 +1,5 @@
from datetime import datetime
from enum import StrEnum
from concurrent.futures import ThreadPoolExecutor
import requests
import tomllib
import traceback
@ -125,9 +124,8 @@ if True:
print(f"{FG.r}{ex}{FG._}")
print(f"{FG.y}{traceback.format_exc()}{FG._}")
raise ex
with ThreadPoolExecutor(max_workers=netthd) as pool:
for post in lst.posts:
pool.submit(_add_post, block, post)
for post in lst.posts:
_add_post(block, post)
# Increase pid for next iteration
pid += len(lst.posts)