import os import sqlite3 import time import random import tomllib from .scraper import get_posts with open("config.toml", "rb") as file: config = tomllib.load(file) retry_delay = tuple(config["scraper"]["retry-delay"]) next_delay = tuple(config["scraper"]["next-delay"]) start_pid = config["scraper"]["start-pid"] end_pid = config["scraper"]["end-pid"] db = sqlite3.connect("data.db") db.execute("CREATE TABLE IF NOT EXISTS post(id INT UNIQUE NOT NULL, image_dir INT NOT NULL, image_id TEXT NOT NULL, tags TEXT NOT NULL, thumbnail BLOB NOT NULL);") db.commit() last_exception: Exception | None = None if start_pid == -1: if os.path.exists("last_pid.txt"): with open("last_pid.txt", "r") as file: start_pid = int(file.read().strip()) else: start_pid = 0 for pid in range(start_pid, end_pid, 42): print(pid) for _ in range(3): try: last_exception = None posts = get_posts(f"https://rule34.xxx/index.php?page=post&s=list&pid={pid}") break except Exception as e: last_exception = e print("Retrying") scraper = cloudscraper.CloudScraper() time.sleep(random.randint(*retry_delay)) if last_exception: raise last_exception post_values = list(map(lambda p: (p.id, p.image_dir, p.image_id, " ".join(p.tags), p.thumbnail_data), posts)) db.executemany("INSERT OR REPLACE INTO post(id, image_dir, image_id, tags, thumbnail) VALUES(?, ?, ?, ?, ?)", post_values) db.commit() with open("last_pid.txt", "w") as file: file.write(str(pid)) time.sleep(random.randint(*next_delay))