Files
py34/scraper/server/__main__.py
Tomas 1dc17d7670 Don't create py34.Post for downloading images
py34.Post does extension scan, that is not required as it is saved
inside the block data.
2025-10-09 20:05:36 +02:00

135 lines
3.9 KiB
Python

from scraper.config import config
from flask import Flask, Response, request, render_template, url_for, send_file
from pathlib import Path
import py34
from .block import BLOCK_SIZE, list_blocks, load_blocks, load_block, load_block_stats, save_block, loads as parse_block, enttype2ext
from .job import assign_job, working_on, any_job, jobs
# Create Flask application
app = Flask(__name__)
# Setup application configuration
for key in config["server"]["web"]:
app.config[key.replace("-", "_").upper()] = config["server"]["web"][key]
@app.route("/")
def index():
return render_template("index.j2")
@app.get("/job")
def get_job():
blocks = list_blocks()
blocks.sort(key = lambda b: b.start)
for n, block in zip(range(len(blocks)), blocks):
next_block = blocks[n+1] if len(blocks) != n+1 else None
# Try to fill up hallow blocks
if not block.full and not working_on(block.start):
return assign_job(block.start, block.size).to_dict()
# Try to append new block
virt_start = block.start + block.size
virt_size = BLOCK_SIZE - virt_start % BLOCK_SIZE + 1
if next_block is None:
# We are the last block
while True:
if not working_on(virt_start):
return assign_job(virt_start, virt_size).to_dict()
virt_start += virt_size
virt_size = BLOCK_SIZE
else:
# We are not the last block
while virt_start < next_block.start:
if not working_on(virt_start):
return assign_job(virt_start, virt_size).to_dict()
virt_start += virt_size
virt_size = BLOCK_SIZE
assert len(blocks) == 0 # We should not be here if blocks exists
if not any_job():
return assign_job(1, BLOCK_SIZE).to_dict()
return assign_job(
max(jobs(), key = lambda j: j.start).start + BLOCK_SIZE,
BLOCK_SIZE,
).to_dict()
@app.get("/blocks")
def get_block_ids():
return list(map(lambda l: l.to_dict(), list_blocks()))
@app.get("/block_stats")
def get_block_stats():
return list(map(lambda h: h.to_dict(), load_block_stats()))
@app.get("/rawblock/<post_id>")
def get_rawblock(post_id: int = None):
assert post_id is not None
return Response(
mimetype = "application/octet-stream",
response = load_block(int(post_id)).dumps(),
)
@app.get("/block/<post_id>")
def get_block(post_id: int = None):
assert post_id is not None
return load_block(int(post_id)).to_dict()
@app.post("/block")
def put_block():
save_block(parse_block(request.data))
return "ok"
@app.get("/thumbnail/<post_id>")
def get_thumbnail(post_id: int = None):
assert post_id is not None
post_id = int(post_id)
return Response(
response=load_block(post_id).entry(post_id).thumbnail,
content_type="image/jpeg",
)
@app.get("/sample/<post_id>")
def get_sample(post_id: int = None):
assert post_id is not None
post_id = int(post_id)
entry = load_block(post_id).entry(post_id)
return Response(
status = 307,
headers = {
"Location": str(py34.url.SampleURL(entry.dir, entry.image))
}
)
@app.get("/image/<post_id>")
def get_image(post_id: int = None):
assert post_id is not None
post_id = int(post_id)
entry = load_block(post_id).entry(post_id)
if entry.type < 100:
return Response(
"Image not found",
status = 404,
)
path = Path.cwd() / Path(f"image/{entry.dir}/{entry.image}.{enttype2ext(entry.type)}")
path.parent.mkdir(parents=True, exist_ok=True)
if path.exists():
return send_file(path)
else:
image = py34.scraper.scraper.get(py34.url.ImageURL(entry.dir, entry.image, enttype2ext(entry.type)))
with open(path, "wb") as file:
file.write(image)
return send_file(path)
# Run application
if __name__ == "__main__":
app.run()