diff --git a/.gitignore b/.gitignore index e417e70..e1776a7 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ __pycache__/ # Output files /last_pid.txt /data.db +/errored-document.html +*.log diff --git a/scraper/__main__.py b/scraper/__main__.py index 480c3a4..f819333 100644 --- a/scraper/__main__.py +++ b/scraper/__main__.py @@ -4,7 +4,8 @@ import time import random import tomllib -from .scraper import get_posts +from .scraper import get_posts, scraper +import cloudscraper with open("config.toml", "rb") as file: config = tomllib.load(file) @@ -33,7 +34,7 @@ for pid in range(start_pid, end_pid, 42): for _ in range(3): try: last_exception = None - posts = get_posts(f"https://rule34.xxx/index.php?page=post&s=list&pid={pid}") + posts = get_posts(f"https://rule34.xxx/index.php?page=post&s=list&tags=all&pid={pid}") break except Exception as e: last_exception = e