525 lines
22 KiB
Python
525 lines
22 KiB
Python
import asyncio
|
|
import os
|
|
import json
|
|
import shutil
|
|
import tempfile
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple
|
|
|
|
from sqlalchemy import select, and_, or_
|
|
|
|
from app.core.logger import make_log
|
|
from app.core.storage import db_session
|
|
from app.core._config import UPLOADS_DIR, BACKEND_LOGS_DIR_HOST
|
|
from app.core.models.content_v3 import (
|
|
EncryptedContent,
|
|
ContentKey,
|
|
ContentDerivative,
|
|
UploadSession,
|
|
)
|
|
from app.core.models.node_storage import StoredContent
|
|
from app.core.ipfs_client import cat_stream
|
|
from app.core.crypto.encf_stream import decrypt_encf_auto
|
|
from app.core.crypto.keywrap import unwrap_dek, wrap_dek, KeyWrapError
|
|
from app.core.network.key_client import request_key_from_peer
|
|
from app.core.models.my_network import KnownNode
|
|
from app.core._utils.resolve_content import resolve_content
|
|
from app.core.content.content_id import ContentId
|
|
|
|
|
|
CONCURRENCY = int(os.getenv("CONVERT_V3_MAX_CONCURRENCY", "3"))
|
|
STAGING_SUBDIR = os.getenv("CONVERT_V3_STAGING_SUBDIR", "convert-staging")
|
|
UPLOADS_PATH = Path(UPLOADS_DIR).resolve()
|
|
_host_uploads_env = os.getenv("BACKEND_DATA_DIR_HOST")
|
|
HOST_UPLOADS_PATH = Path(_host_uploads_env).resolve() if _host_uploads_env else None
|
|
|
|
|
|
@dataclass
|
|
class PlainStaging:
|
|
container_path: str
|
|
host_path: str
|
|
|
|
|
|
def _container_to_host(path: str) -> str:
|
|
"""Map a container path under UPLOADS_DIR to the host path for docker -v."""
|
|
if not HOST_UPLOADS_PATH:
|
|
raise RuntimeError("BACKEND_DATA_DIR_HOST is not configured for convert_v3")
|
|
real_path = Path(path).resolve()
|
|
try:
|
|
real_path.relative_to(UPLOADS_PATH)
|
|
except ValueError:
|
|
# Not under uploads; best effort fallback to original string
|
|
return str(real_path)
|
|
rel = real_path.relative_to(UPLOADS_PATH)
|
|
return str(HOST_UPLOADS_PATH / rel)
|
|
|
|
|
|
MEDIA_CONVERTER_CPU_LIMIT = os.getenv("MEDIA_CONVERTER_CPU_LIMIT")
|
|
MEDIA_CONVERTER_MEM_LIMIT = os.getenv("MEDIA_CONVERTER_MEM_LIMIT")
|
|
MEDIA_CONVERTER_CPUSET = os.getenv("MEDIA_CONVERTER_CPUSET") or os.getenv("CONVERT_CPUSET")
|
|
ERROR_TRUNCATE_LIMIT = 512
|
|
|
|
|
|
def _ensure_dir(path: str):
|
|
try:
|
|
os.makedirs(path, exist_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def _sha256_b58(file_path: str) -> str:
|
|
import hashlib
|
|
import base58
|
|
h = hashlib.sha256()
|
|
with open(file_path, 'rb') as f:
|
|
for chunk in iter(lambda: f.read(2 * 1024 * 1024), b''):
|
|
h.update(chunk)
|
|
return base58.b58encode(h.digest()).decode()
|
|
|
|
|
|
async def _save_derivative(file_path: str, filename: str) -> Tuple[str, int]:
|
|
"""Move file into UPLOADS_DIR under sha256 b58 name; return (hash_b58, size)."""
|
|
file_hash = await _sha256_b58(file_path)
|
|
dst = os.path.join(UPLOADS_DIR, file_hash)
|
|
try:
|
|
os.remove(dst)
|
|
except FileNotFoundError:
|
|
pass
|
|
shutil.move(file_path, dst)
|
|
size = os.path.getsize(dst)
|
|
return file_hash, size
|
|
|
|
|
|
async def _run_media_converter(staging: PlainStaging, input_ext: str, quality: str, trim_value: Optional[str], is_audio: bool):
|
|
if not os.path.exists(staging.container_path):
|
|
raise FileNotFoundError(f"Plain input missing at {staging.container_path}")
|
|
|
|
host_input_path = staging.host_path
|
|
if not host_input_path or not host_input_path.startswith('/'):
|
|
host_input_path = os.path.abspath(host_input_path)
|
|
|
|
rid = __import__('uuid').uuid4().hex[:8]
|
|
output_dir_container = UPLOADS_PATH / "convert-output" / f"conv_{rid}"
|
|
output_dir_host = _container_to_host(output_dir_container)
|
|
_ensure_dir(str(output_dir_container))
|
|
|
|
logs_dir_candidate = os.getenv("BACKEND_LOGS_DIR_HOST", "")
|
|
logs_dir_host = logs_dir_candidate if logs_dir_candidate else str(HOST_UPLOADS_PATH / "logs" / "converter") if HOST_UPLOADS_PATH else "/tmp/converter-logs"
|
|
if not logs_dir_host.startswith('/'):
|
|
logs_dir_host = os.path.join(os.getcwd(), logs_dir_host)
|
|
try:
|
|
os.makedirs(logs_dir_host, exist_ok=True)
|
|
except Exception:
|
|
fallback_logs = HOST_UPLOADS_PATH / "logs" / "converter" if HOST_UPLOADS_PATH else Path("/tmp/converter-logs")
|
|
logs_dir_host = str(fallback_logs)
|
|
os.makedirs(logs_dir_host, exist_ok=True)
|
|
|
|
cmd = [
|
|
"docker", "run", "--rm",
|
|
"-v", f"{host_input_path}:/app/input:ro",
|
|
"-v", f"{output_dir_host}:/app/output",
|
|
"-v", f"{logs_dir_host}:/app/logs",
|
|
]
|
|
if MEDIA_CONVERTER_CPU_LIMIT:
|
|
cmd.extend(["--cpus", str(MEDIA_CONVERTER_CPU_LIMIT)])
|
|
if MEDIA_CONVERTER_MEM_LIMIT:
|
|
cmd.extend(["--memory", str(MEDIA_CONVERTER_MEM_LIMIT)])
|
|
if MEDIA_CONVERTER_CPUSET:
|
|
cmd.extend(["--cpuset-cpus", MEDIA_CONVERTER_CPUSET])
|
|
|
|
cmd.append("media_converter")
|
|
cmd.extend(["--ext", input_ext, "--quality", quality])
|
|
if trim_value:
|
|
cmd.extend(["--trim", trim_value])
|
|
|
|
make_log('convert_v3', f"Run media_converter cmd: {' '.join(cmd)}")
|
|
|
|
proc = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
stdout, stderr = await proc.communicate()
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"media_converter failed: {stderr.decode()}")
|
|
|
|
# Find produced media file and optional output.json
|
|
try:
|
|
files = os.listdir(output_dir_container)
|
|
except Exception as e:
|
|
raise RuntimeError(f"Read output dir error: {e}")
|
|
media_files = [f for f in files if f != "output.json"]
|
|
if len(media_files) != 1:
|
|
raise RuntimeError(f"Expected one media file, found {len(media_files)}: {media_files}")
|
|
output_media = os.path.join(output_dir_container, media_files[0])
|
|
ffprobe_meta = {}
|
|
out_json = os.path.join(output_dir_container, "output.json")
|
|
if os.path.exists(out_json):
|
|
try:
|
|
with open(out_json, 'r') as f:
|
|
ffprobe_meta = json.load(f)
|
|
except Exception:
|
|
ffprobe_meta = {}
|
|
return output_media, ffprobe_meta
|
|
|
|
|
|
async def _update_upload_session(ec: EncryptedContent, all_success: bool, errors: List[str]):
|
|
async with db_session() as session:
|
|
upload_row = (await session.execute(
|
|
select(UploadSession).where(UploadSession.encrypted_cid == ec.encrypted_cid)
|
|
)).scalars().first()
|
|
if upload_row:
|
|
if all_success:
|
|
upload_row.state = 'converted'
|
|
upload_row.error = None
|
|
elif upload_row.state != 'converted':
|
|
upload_row.state = 'conversion_failed'
|
|
if errors:
|
|
upload_row.error = _short_error(errors[0])
|
|
await session.commit()
|
|
|
|
|
|
async def _convert_content(ec: EncryptedContent, staging: PlainStaging):
|
|
content_kind = 'audio' if ec.content_type.startswith('audio/') else ('video' if ec.content_type.startswith('video/') else 'other')
|
|
input_ext = (ec.content_type.split('/')[-1] or 'bin')
|
|
is_audio = content_kind == 'audio'
|
|
encrypted_hash_b58 = ContentId.deserialize(ec.encrypted_cid).content_hash_b58
|
|
|
|
if content_kind == 'other':
|
|
errors: List[str] = []
|
|
all_success = True
|
|
try:
|
|
file_hash, size_bytes = await _save_derivative(staging.container_path, staging.container_path)
|
|
plain_path = os.path.join(UPLOADS_DIR, file_hash)
|
|
plain_filename = f"{ec.encrypted_cid}.{input_ext}" if input_ext else ec.encrypted_cid
|
|
async with db_session() as session:
|
|
existing = (await session.execute(select(StoredContent).where(StoredContent.hash == file_hash))).scalars().first()
|
|
if existing:
|
|
sc = existing
|
|
sc.type = sc.type or "local/content_bin"
|
|
sc.filename = plain_filename
|
|
sc.meta = {
|
|
**(sc.meta or {}),
|
|
'encrypted_cid': ec.encrypted_cid,
|
|
'kind': 'original',
|
|
'content_type': ec.content_type,
|
|
}
|
|
sc.updated = datetime.utcnow()
|
|
else:
|
|
sc = StoredContent(
|
|
type="local/content_bin",
|
|
hash=file_hash,
|
|
user_id=None,
|
|
filename=plain_filename,
|
|
meta={
|
|
'encrypted_cid': ec.encrypted_cid,
|
|
'kind': 'original',
|
|
'content_type': ec.content_type,
|
|
},
|
|
created=datetime.utcnow(),
|
|
)
|
|
session.add(sc)
|
|
await session.flush()
|
|
|
|
encrypted_records = (await session.execute(select(StoredContent).where(StoredContent.hash == encrypted_hash_b58))).scalars().all()
|
|
for encrypted_sc in encrypted_records:
|
|
meta = dict(encrypted_sc.meta or {})
|
|
converted = dict(meta.get('converted_content') or {})
|
|
converted['original'] = file_hash
|
|
meta['converted_content'] = converted
|
|
if 'content_type' not in meta:
|
|
meta['content_type'] = ec.content_type
|
|
encrypted_sc.meta = meta
|
|
encrypted_sc.decrypted_content_id = sc.id
|
|
encrypted_sc.updated = datetime.utcnow()
|
|
|
|
derivative = ContentDerivative(
|
|
content_id=ec.id,
|
|
kind='decrypted_original',
|
|
local_path=plain_path,
|
|
content_type=ec.content_type,
|
|
size_bytes=size_bytes,
|
|
status='ready',
|
|
)
|
|
session.add(derivative)
|
|
await session.commit()
|
|
make_log('convert_v3', f"Stored original derivative for {ec.encrypted_cid}")
|
|
except Exception as e:
|
|
all_success = False
|
|
errors.append(str(e))
|
|
make_log('convert_v3', f"Convert error {ec.encrypted_cid} opt=original: {e}", level='error')
|
|
await _update_upload_session(ec, all_success, errors)
|
|
return
|
|
|
|
# audio/video path
|
|
required = ['high', 'low', 'low_preview']
|
|
conf = ec.preview_conf or {}
|
|
intervals = conf.get('intervals') or [[0, int(conf.get('duration_ms', 30000))]]
|
|
main_interval = intervals[0]
|
|
start_s = max(0, int(main_interval[0]) // 1000)
|
|
dur_s = max(1, int((main_interval[1] - main_interval[0]) // 1000) or 30)
|
|
trim_value = f"{start_s}-{start_s + dur_s}"
|
|
|
|
qualities = {
|
|
'high': 'high',
|
|
'low': 'low',
|
|
'low_preview': 'low',
|
|
}
|
|
|
|
all_success = True
|
|
errors: List[str] = []
|
|
|
|
for opt in required:
|
|
derivative_kind = f"decrypted_{opt if opt != 'low_preview' else 'preview'}"
|
|
derivative_id: Optional[int] = None
|
|
try:
|
|
async with db_session() as session:
|
|
cd = ContentDerivative(
|
|
content_id=ec.id,
|
|
kind=derivative_kind,
|
|
interval_start_ms=main_interval[0] if opt == 'low_preview' else None,
|
|
interval_end_ms=main_interval[1] if opt == 'low_preview' else None,
|
|
local_path="",
|
|
status='processing',
|
|
)
|
|
session.add(cd)
|
|
await session.flush()
|
|
derivative_id = cd.id
|
|
await session.commit()
|
|
|
|
out_path, ffprobe = await _run_media_converter(
|
|
staging=staging,
|
|
input_ext=input_ext,
|
|
quality=qualities[opt],
|
|
trim_value=trim_value if opt == 'low_preview' else None,
|
|
is_audio=is_audio,
|
|
)
|
|
|
|
file_hash, size_bytes = await _save_derivative(out_path, os.path.basename(out_path))
|
|
|
|
async with db_session() as session:
|
|
sc = (await session.execute(select(StoredContent).where(StoredContent.hash == file_hash))).scalars().first()
|
|
meta_payload = {'encrypted_cid': ec.encrypted_cid, 'kind': opt, 'ffprobe_meta': ffprobe}
|
|
if sc:
|
|
sc.type = sc.type or "local/content_bin"
|
|
sc.filename = os.path.basename(out_path)
|
|
sc.meta = meta_payload
|
|
sc.updated = datetime.utcnow()
|
|
else:
|
|
sc = StoredContent(
|
|
type="local/content_bin",
|
|
hash=file_hash,
|
|
user_id=None,
|
|
filename=os.path.basename(out_path),
|
|
meta=meta_payload,
|
|
created=datetime.utcnow(),
|
|
)
|
|
session.add(sc)
|
|
await session.flush()
|
|
|
|
encrypted_sc = (await session.execute(select(StoredContent).where(StoredContent.hash == encrypted_hash_b58))).scalars().first()
|
|
if encrypted_sc:
|
|
meta = dict(encrypted_sc.meta or {})
|
|
converted = dict(meta.get('converted_content') or {})
|
|
converted[opt] = file_hash
|
|
meta['converted_content'] = converted
|
|
encrypted_sc.meta = meta
|
|
if opt == 'high':
|
|
encrypted_sc.decrypted_content_id = sc.id
|
|
encrypted_sc.updated = datetime.utcnow()
|
|
|
|
cd = await session.get(ContentDerivative, derivative_id) if derivative_id else None
|
|
if cd:
|
|
cd.local_path = os.path.join(UPLOADS_DIR, file_hash)
|
|
cd.size_bytes = size_bytes
|
|
if is_audio:
|
|
cd.content_type = 'audio/flac' if opt == 'high' else 'audio/mpeg'
|
|
else:
|
|
cd.content_type = ec.content_type if opt == 'high' else 'video/mp4'
|
|
cd.status = 'ready'
|
|
cd.error = None
|
|
await session.commit()
|
|
|
|
output_parent = Path(out_path).parent
|
|
shutil.rmtree(output_parent, ignore_errors=True)
|
|
make_log('convert_v3', f"Converted {ec.encrypted_cid} opt={opt} -> {file_hash}")
|
|
except Exception as e:
|
|
make_log('convert_v3', f"Convert error {ec.encrypted_cid} opt={opt}: {e}", level='error')
|
|
all_success = False
|
|
errors.append(_short_error(e))
|
|
async with db_session() as session:
|
|
cd = await session.get(ContentDerivative, derivative_id) if derivative_id else None
|
|
if cd:
|
|
cd.status = 'failed'
|
|
cd.error = _short_error(e)
|
|
else:
|
|
session.add(ContentDerivative(
|
|
content_id=ec.id,
|
|
kind=derivative_kind,
|
|
status='failed',
|
|
error=_short_error(e),
|
|
local_path="",
|
|
))
|
|
await session.commit()
|
|
|
|
await _update_upload_session(ec, all_success, errors)
|
|
|
|
|
|
async def _pick_pending(limit: int) -> List[Tuple[EncryptedContent, PlainStaging]]:
|
|
async with db_session() as session:
|
|
# Include preview-enabled media and non-media content that need decrypted originals
|
|
non_media_filter = and_(
|
|
EncryptedContent.content_type.isnot(None),
|
|
~EncryptedContent.content_type.like('audio/%'),
|
|
~EncryptedContent.content_type.like('video/%'),
|
|
)
|
|
ecs = (await session.execute(
|
|
select(EncryptedContent)
|
|
.where(or_(EncryptedContent.preview_enabled == True, non_media_filter))
|
|
.order_by(EncryptedContent.created_at.desc())
|
|
)).scalars().all()
|
|
|
|
picked: List[Tuple[EncryptedContent, PlainStaging]] = []
|
|
for ec in ecs:
|
|
try:
|
|
cid_obj, cid_err = resolve_content(ec.encrypted_cid)
|
|
if cid_err:
|
|
make_log('convert_v3', f"Skip {ec.encrypted_cid}: resolve error {cid_err}", level='debug')
|
|
continue
|
|
encrypted_hash_b58 = cid_obj.content_hash_b58
|
|
except Exception as exc:
|
|
make_log('convert_v3', f"Skip {ec.encrypted_cid}: resolve exception {exc}", level='warning')
|
|
continue
|
|
|
|
sc = (await session.execute(select(StoredContent).where(StoredContent.hash == encrypted_hash_b58))).scalars().first()
|
|
if not sc or sc.onchain_index is None:
|
|
continue
|
|
|
|
# Check if derivatives already ready
|
|
rows = (await session.execute(select(ContentDerivative).where(ContentDerivative.content_id == ec.id))).scalars().all()
|
|
kinds_ready = {r.kind for r in rows if r.status == 'ready'}
|
|
if ec.content_type.startswith('audio/'):
|
|
required = {'decrypted_low', 'decrypted_high'}
|
|
elif ec.content_type.startswith('video/'):
|
|
required = {'decrypted_low', 'decrypted_high', 'decrypted_preview'}
|
|
else:
|
|
required = {'decrypted_original'}
|
|
if required.issubset(kinds_ready):
|
|
continue
|
|
# Always decrypt from IPFS using local or remote key
|
|
staging: Optional[PlainStaging] = None
|
|
ck = (await session.execute(select(ContentKey).where(ContentKey.content_id == ec.id))).scalars().first()
|
|
if ck:
|
|
staging = await stage_plain_from_ipfs(ec, ck.key_ciphertext_b64)
|
|
if not staging:
|
|
peers = (await session.execute(select(KnownNode))).scalars().all()
|
|
for peer in peers:
|
|
meta = peer.meta or {}
|
|
public_host = meta.get('public_host')
|
|
if not public_host:
|
|
last_resp = (meta.get('last_response') or {}).get('node', {}) if isinstance(meta, dict) else {}
|
|
public_host = last_resp.get('public_host')
|
|
base_url = public_host or f"http://{peer.ip}:{peer.port}"
|
|
dek = await request_key_from_peer(base_url, ec.encrypted_cid)
|
|
if not dek:
|
|
continue
|
|
try:
|
|
dek_b64 = wrap_dek(dek)
|
|
except KeyWrapError as exc:
|
|
make_log('convert_v3', f"wrap failed for peer DEK: {exc}", level='error')
|
|
continue
|
|
session_ck = ContentKey(
|
|
content_id=ec.id,
|
|
key_ciphertext_b64=dek_b64,
|
|
key_fingerprint=peer.public_key,
|
|
issuer_node_id=peer.public_key,
|
|
allow_auto_grant=True,
|
|
)
|
|
session.add(session_ck)
|
|
await session.commit()
|
|
staging = await stage_plain_from_ipfs(ec, dek_b64)
|
|
if staging:
|
|
break
|
|
if not staging or not os.path.exists(staging.container_path):
|
|
continue
|
|
picked.append((ec, staging))
|
|
if len(picked) >= limit:
|
|
break
|
|
return picked
|
|
|
|
|
|
async def worker_loop():
|
|
sem = asyncio.Semaphore(CONCURRENCY)
|
|
|
|
async def _run_one(ec: EncryptedContent, staging: PlainStaging):
|
|
async with sem:
|
|
try:
|
|
await _convert_content(ec, staging)
|
|
# After successful conversion, attempt to remove staging file to avoid duplicates
|
|
try:
|
|
if staging and staging.container_path and os.path.exists(staging.container_path):
|
|
os.remove(staging.container_path)
|
|
except Exception:
|
|
pass
|
|
except Exception as e:
|
|
make_log('convert_v3', f"job error {ec.encrypted_cid}: {e}", level='error')
|
|
|
|
while True:
|
|
try:
|
|
batch = await _pick_pending(limit=CONCURRENCY * 2)
|
|
if not batch:
|
|
await asyncio.sleep(3)
|
|
continue
|
|
tasks = [asyncio.create_task(_run_one(ec, staging)) for (ec, staging) in batch]
|
|
await asyncio.gather(*tasks)
|
|
except Exception as e:
|
|
make_log('convert_v3', f"loop error: {e}", level='error')
|
|
await asyncio.sleep(2)
|
|
|
|
|
|
async def main_fn(memory):
|
|
make_log('convert_v3', f"Service started with concurrency={CONCURRENCY}", level='info')
|
|
await worker_loop()
|
|
|
|
|
|
async def stage_plain_from_ipfs(ec: EncryptedContent, dek_wrapped: str) -> Optional[PlainStaging]:
|
|
"""Download encrypted ENCF stream from IPFS and decrypt on the fly into shared staging."""
|
|
os.makedirs(UPLOADS_PATH / STAGING_SUBDIR, exist_ok=True)
|
|
try:
|
|
dek = unwrap_dek(dek_wrapped)
|
|
except KeyWrapError as exc:
|
|
make_log('convert_v3', f"unwrap failed for {ec.encrypted_cid}: {exc}", level='error')
|
|
return None
|
|
|
|
tmp = tempfile.NamedTemporaryFile(
|
|
prefix=f"dec_{ec.encrypted_cid[:8]}_",
|
|
dir=UPLOADS_PATH / STAGING_SUBDIR,
|
|
delete=False,
|
|
)
|
|
tmp_path = tmp.name
|
|
tmp.close()
|
|
try:
|
|
async def _aiter():
|
|
async for ch in cat_stream(ec.encrypted_cid):
|
|
yield ch
|
|
await decrypt_encf_auto(_aiter(), dek, tmp_path)
|
|
host_path = _container_to_host(tmp_path)
|
|
return PlainStaging(container_path=tmp_path, host_path=host_path)
|
|
except Exception as e:
|
|
make_log('convert_v3', f"decrypt from ipfs failed: {e}", level='error')
|
|
try:
|
|
os.remove(tmp_path)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
|
|
def _short_error(message: str, limit: int = ERROR_TRUNCATE_LIMIT) -> str:
|
|
if not message:
|
|
return message
|
|
message = str(message)
|
|
return message if len(message) <= limit else message[: limit - 3] + '...'
|