py34/scraper/server/__main__.py

from scraper.config import config
from flask import Flask, Response, request, render_template, url_for, send_file
from pathlib import Path
import py34
from .block import BLOCK_SIZE, list_blocks, load_blocks, load_block, load_block_stats, save_block, loads as parse_block, enttype2ext
from .job import assign_job, working_on, any_job, jobs


# Create Flask application
app = Flask(__name__)


# Setup application configuration
for key in config["server"]["web"]:
    app.config[key.replace("-", "_").upper()] = config["server"]["web"][key]


@app.route("/")
def index():
    return render_template("index.j2")

@app.get("/job")
def get_job():
    blocks = list_blocks()
    blocks.sort(key = lambda b: b.start)
    for n, block in zip(range(len(blocks)), blocks):
        next_block = blocks[n+1] if len(blocks) != n+1 else None

        # Try to fill up hallow blocks
        if not block.full and not working_on(block.start):
            return assign_job(block.start, block.size).to_dict()

        # Try to append new block
        virt_start = block.start + block.size
        virt_size = BLOCK_SIZE - virt_start % BLOCK_SIZE + 1
        if next_block is None:

            # We are the last block
            while True:
                if not working_on(virt_start):
                    return assign_job(virt_start, virt_size).to_dict()
                virt_start += virt_size
                virt_size = BLOCK_SIZE
        else:

            # We are not the last block
            while virt_start < next_block.start:
                if not working_on(virt_start):
                    return assign_job(virt_start, virt_size).to_dict()
                virt_start += virt_size
                virt_size = BLOCK_SIZE

    assert len(blocks) == 0  # We should not be here if blocks exists

    if not any_job():
        return assign_job(1, BLOCK_SIZE).to_dict()
    return assign_job(
        max(jobs(), key = lambda j: j.start).start + BLOCK_SIZE,
        BLOCK_SIZE,
    ).to_dict()


@app.get("/blocks")
def get_block_ids():
    return list(map(lambda l: l.to_dict(), list_blocks()))

@app.get("/block_stats")
def get_block_stats():
    return list(map(lambda h: h.to_dict(), load_block_stats()))

@app.get("/rawblock/<post_id>")
def get_rawblock(post_id: int = None):
    assert post_id is not None
    return Response(
        mimetype = "application/octet-stream",
        response = load_block(int(post_id)).dumps(),
    )

@app.get("/block/<post_id>")
def get_block(post_id: int = None):
    assert post_id is not None
    return load_block(int(post_id)).to_dict()

@app.post("/block")
def put_block():
    save_block(parse_block(request.data))
    return "ok"

@app.get("/thumbnail/<post_id>")
def get_thumbnail(post_id: int = None):
    assert post_id is not None
    post_id = int(post_id)
    return Response(
        response=load_block(post_id).entry(post_id).thumbnail,
        content_type="image/jpeg",
    )

@app.get("/sample/<post_id>")
def get_sample(post_id: int = None):
    assert post_id is not None
    post_id = int(post_id)
    entry = load_block(post_id).entry(post_id)
    return Response(
        status = 307,
        headers = {
            "Location": str(py34.url.SampleURL(entry.dir, entry.image))
        }
    )

@app.get("/image/<post_id>")
def get_image(post_id: int = None):
    assert post_id is not None
    post_id = int(post_id)
    entry = load_block(post_id).entry(post_id)
    if entry.type < 100:
        return Response(
            "Image not found",
            status = 404,
        )

    path = Path.cwd() / Path(f"image/{entry.dir}/{entry.image}.{enttype2ext(entry.type)}")
    path.parent.mkdir(parents=True, exist_ok=True)
    if path.exists():
        return send_file(path)
    else:
        image = py34.scraper.scraper.get(py34.url.ImageURL(entry.dir, entry.image, enttype2ext(entry.type)))
        with open(path, "wb") as file:
            file.write(image)
        return send_file(path)


# Run application
if __name__ == "__main__":
    app.run()