"""
somantix_c2pa.py — Somantix C2PA Bridge Module
================================================
Bridges the Somantix manifest format with C2PA (Coalition for Content
Provenance and Authenticity) content credential assertions.

Provides:
  - Conversion from Somantix manifests to C2PA claim structures
  - Assertion builders for standard C2PA labels
  - Manifest merging (inject C2PA block into existing Soma manifest)
  - Validation helpers to check C2PA block integrity
  - JUMBF stub for future binary embedding

Usage:
    from somantix_c2pa import C2PABridge, build_c2pa_block

    # From an existing Soma manifest dict:
    bridge = C2PABridge(soma_manifest)
    c2pa_block = bridge.build()
    soma_manifest['c2pa'] = c2pa_block

    # Or standalone:
    block = build_c2pa_block(
        signer_name="Jane Doe",
        signer_role="VFX Compositor",
        providers=[{"provider": "Runway", "model": "Gen-4", "tier": "pro"}],
        tos_urls=[{"provider": "Runway", "url": "https://runwayml.com/terms-of-service"}],
    )

Requirements:
    Python >= 3.9
    hashlib (stdlib)
    json (stdlib)
    Optional: c2pa-python (for real JUMBF signing)
"""

from __future__ import annotations

import hashlib
import json
import os
import tempfile
import time
import uuid
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any, Optional


# ═══════════════════════════════════════════════════════════════
# CONSTANTS
# ═══════════════════════════════════════════════════════════════

SOMA_VERSION = "1.0"
CLAIM_GENERATOR = "Somantix/1.0"
CLAIM_GENERATOR_INFO = [{"name": "Somantix Wrapper", "version": SOMA_VERSION}]

# IPTC Digital Source Types
class DigitalSourceType(str, Enum):
    DIGITAL_CAPTURE       = "http://cv.iptc.org/newscodes/digitalsourcetype/digitalCapture"
    MINOR_HUMAN_EDITS     = "http://cv.iptc.org/newscodes/digitalsourcetype/minorHumanEdits"
    COMPOSITE             = "http://cv.iptc.org/newscodes/digitalsourcetype/composite"
    COMPOSITE_SYNTHETIC   = "http://cv.iptc.org/newscodes/digitalsourcetype/compositeSynthetic"
    TRAINED_ALGORITHMIC   = "http://cv.iptc.org/newscodes/digitalsourcetype/trainedAlgorithmicMedia"
    COMPOSITE_WITH_TRAINED = "http://cv.iptc.org/newscodes/digitalsourcetype/compositeWithTrainedAlgorithmicMedia"
    ALGORITHMIC_MEDIA     = "http://cv.iptc.org/newscodes/digitalsourcetype/algorithmicMedia"

# C2PA Assertion Labels
class C2PALabel(str, Enum):
    ACTIONS           = "c2pa.actions"
    HASH_DATA         = "c2pa.hash.data"
    CREATIVE_WORK     = "stds.schema-org.CreativeWork"
    AI_TRAINING       = "c2pa.ai_training"
    INGREDIENT        = "c2pa.ingredient"
    THUMBNAIL         = "c2pa.thumbnail.claim"
    EXIF              = "stds.exif"

# AI Training Use
class TrainingUse(str, Enum):
    NOT_ALLOWED  = "notAllowed"
    ALLOWED      = "allowed"
    CONSTRAINED  = "constrained"


# ═══════════════════════════════════════════════════════════════
# DATA CLASSES
# ═══════════════════════════════════════════════════════════════

@dataclass
class C2PAAssertion:
    """A single C2PA assertion."""
    label: str
    data: dict[str, Any]

    def to_dict(self) -> dict:
        return {"label": self.label, "data": self.data}


@dataclass
class C2PASignatureInfo:
    """Signature metadata."""
    alg: str = "Ed25519"
    issuer: str = "Somantix Self-Sign"
    cert_serial: Optional[str] = None
    time: Optional[str] = None

    def to_dict(self) -> dict:
        d = {"alg": self.alg, "issuer": self.issuer}
        if self.cert_serial:
            d["cert_serial"] = self.cert_serial
        if self.time:
            d["time"] = self.time
        return d


@dataclass
class C2PABlock:
    """Complete C2PA block for embedding in a Soma manifest."""
    claim_generator: str = CLAIM_GENERATOR
    claim_generator_info: list[dict] = field(default_factory=lambda: list(CLAIM_GENERATOR_INFO))
    assertions: list[C2PAAssertion] = field(default_factory=list)
    signature_info: C2PASignatureInfo = field(default_factory=C2PASignatureInfo)
    claim_id: Optional[str] = None

    def __post_init__(self):
        if not self.claim_id:
            self.claim_id = f"urn:uuid:{uuid.uuid4()}"

    def to_dict(self) -> dict:
        return {
            "claim_generator": self.claim_generator,
            "claim_generator_info": self.claim_generator_info,
            "assertions": [a.to_dict() for a in self.assertions],
            "signature_info": self.signature_info.to_dict(),
            "claim_id": self.claim_id,
        }


# ═══════════════════════════════════════════════════════════════
# ASSERTION BUILDERS
# ═══════════════════════════════════════════════════════════════

def build_actions_assertion(
    has_ai_providers: bool = False,
    software_agent: str = "Somantix/1.0",
    action: str = "c2pa.edited",
) -> C2PAAssertion:
    """Build c2pa.actions assertion with digital source type."""
    source_type = (
        DigitalSourceType.COMPOSITE_WITH_TRAINED
        if has_ai_providers
        else DigitalSourceType.DIGITAL_CAPTURE
    )
    return C2PAAssertion(
        label=C2PALabel.ACTIONS,
        data={
            "actions": [{
                "action": action,
                "digitalSourceType": source_type.value,
                "softwareAgent": software_agent,
            }]
        },
    )


def build_hash_assertion(hash_algorithm: str = "sha256") -> C2PAAssertion:
    """Build c2pa.hash.data assertion."""
    return C2PAAssertion(
        label=C2PALabel.HASH_DATA,
        data={"name": "jumbf manifest", "hash_algorithm": hash_algorithm},
    )


def build_creative_work_assertion(
    signer_name: str,
    signer_role: Optional[str] = None,
    license_urls: Optional[list[str]] = None,
) -> C2PAAssertion:
    """Build stds.schema-org.CreativeWork assertion."""
    author: dict[str, Any] = {"@type": "Person", "name": signer_name}
    if signer_role:
        author["jobTitle"] = signer_role

    data: dict[str, Any] = {
        "@type": "CreativeWork",
        "author": [author],
    }
    if license_urls:
        data["sdLicense"] = ", ".join(license_urls)

    return C2PAAssertion(label=C2PALabel.CREATIVE_WORK, data=data)


def build_ai_training_assertion(
    use: TrainingUse = TrainingUse.NOT_ALLOWED,
    constraint_info: str = "AI training use governed by provider ToS",
) -> C2PAAssertion:
    """Build c2pa.ai_training assertion."""
    return C2PAAssertion(
        label=C2PALabel.AI_TRAINING,
        data={"use": use.value, "constraint_info": constraint_info},
    )


def build_ingredient_assertion(
    title: str,
    instance_id: str,
    format_: str = "application/octet-stream",
    relationship: str = "parentOf",
) -> C2PAAssertion:
    """Build c2pa.ingredient assertion for provenance chains."""
    return C2PAAssertion(
        label=C2PALabel.INGREDIENT,
        data={
            "title": title,
            "instanceID": instance_id,
            "format": format_,
            "relationship": relationship,
        },
    )


# ═══════════════════════════════════════════════════════════════
# BRIDGE: SOMA MANIFEST → C2PA
# ═══════════════════════════════════════════════════════════════

class C2PABridge:
    """
    Converts a Somantix manifest dict into a C2PA-compliant block.

    Usage:
        bridge = C2PABridge(soma_manifest)
        c2pa_block = bridge.build()
        soma_manifest['c2pa'] = c2pa_block
    """

    def __init__(self, manifest: dict[str, Any]):
        self.manifest = manifest

    @property
    def signer_name(self) -> str:
        return self.manifest.get("signer", {}).get("name", "Unknown")

    @property
    def signer_role(self) -> Optional[str]:
        return self.manifest.get("signer", {}).get("role")

    @property
    def providers(self) -> list[dict]:
        return self.manifest.get("providers", [])

    @property
    def has_ai(self) -> bool:
        return len(self.providers) > 0

    @property
    def tos_urls(self) -> list[dict]:
        return self.manifest.get("tos_urls", [])

    def build(self) -> dict:
        """Build the C2PA block dict for embedding into the Soma manifest."""
        assertions = [
            build_actions_assertion(has_ai_providers=self.has_ai),
            build_hash_assertion(),
            build_creative_work_assertion(
                signer_name=self.signer_name,
                signer_role=self.signer_role,
                license_urls=[t["url"] for t in self.tos_urls] if self.tos_urls else None,
            ),
        ]

        if self.has_ai:
            assertions.append(build_ai_training_assertion())

        block = C2PABlock(assertions=assertions)
        return block.to_dict()

    def inject(self) -> dict:
        """Build and inject C2PA block into the manifest, returning the modified manifest."""
        self.manifest["c2pa"] = self.build()
        return self.manifest


# ═══════════════════════════════════════════════════════════════
# STANDALONE BUILDER
# ═══════════════════════════════════════════════════════════════

def build_c2pa_block(
    signer_name: str,
    signer_role: Optional[str] = None,
    providers: Optional[list[dict]] = None,
    tos_urls: Optional[list[dict]] = None,
    training_use: TrainingUse = TrainingUse.NOT_ALLOWED,
) -> dict:
    """
    Build a standalone C2PA block dict from parameters.

    Args:
        signer_name: Name of the content creator.
        signer_role: Optional role/title.
        providers: List of AI provider dicts (with 'provider', 'model', 'tier').
        tos_urls: List of ToS url dicts (with 'provider', 'url').
        training_use: AI training policy.

    Returns:
        dict suitable for manifest['c2pa'].
    """
    providers = providers or []
    tos_urls = tos_urls or []
    has_ai = len(providers) > 0
    license_urls = [t["url"] for t in tos_urls] if tos_urls else None

    assertions = [
        build_actions_assertion(has_ai_providers=has_ai),
        build_hash_assertion(),
        build_creative_work_assertion(
            signer_name=signer_name,
            signer_role=signer_role,
            license_urls=license_urls,
        ),
    ]

    if has_ai:
        assertions.append(build_ai_training_assertion(use=training_use))

    block = C2PABlock(assertions=assertions)
    return block.to_dict()


# ═══════════════════════════════════════════════════════════════
# VALIDATION
# ═══════════════════════════════════════════════════════════════

@dataclass
class C2PAValidationResult:
    valid: bool
    errors: list[str] = field(default_factory=list)
    warnings: list[str] = field(default_factory=list)
    assertion_count: int = 0


def validate_c2pa_block(block: Optional[dict]) -> C2PAValidationResult:
    """
    Validate a C2PA block dict for structural correctness.

    Returns a C2PAValidationResult with any errors/warnings.
    """
    result = C2PAValidationResult(valid=True)

    if block is None:
        result.valid = False
        result.errors.append("C2PA block is missing")
        return result

    # Check required top-level fields
    required = ["claim_generator", "assertions", "signature_info"]
    for key in required:
        if key not in block:
            result.valid = False
            result.errors.append(f"Missing required field: {key}")

    # Validate assertions
    assertions = block.get("assertions", [])
    if not isinstance(assertions, list):
        result.valid = False
        result.errors.append("assertions must be a list")
        return result

    result.assertion_count = len(assertions)

    labels_seen = set()
    for i, assertion in enumerate(assertions):
        if not isinstance(assertion, dict):
            result.errors.append(f"Assertion {i} is not a dict")
            result.valid = False
            continue

        label = assertion.get("label")
        if not label:
            result.errors.append(f"Assertion {i} missing label")
            result.valid = False
        else:
            labels_seen.add(label)

        if "data" not in assertion:
            result.errors.append(f"Assertion {i} ({label}) missing data")
            result.valid = False

    # Check for recommended assertions
    if C2PALabel.ACTIONS not in labels_seen:
        result.warnings.append("Missing recommended assertion: c2pa.actions")
    if C2PALabel.HASH_DATA not in labels_seen:
        result.warnings.append("Missing recommended assertion: c2pa.hash.data")

    # Validate signature_info
    sig = block.get("signature_info", {})
    if isinstance(sig, dict) and "alg" not in sig:
        result.warnings.append("signature_info missing 'alg' field")

    return result


# ═══════════════════════════════════════════════════════════════
# FILE I/O HELPERS
# ═══════════════════════════════════════════════════════════════

SOMA_MARKER = b"\n\n---SOMANTIX-MANIFEST---\n"


def extract_manifest_from_file(filepath: str | Path) -> tuple[Optional[dict], Optional[bytes]]:
    """
    Extract the Somantix manifest and original bytes from a .soma file.

    Returns:
        (manifest_dict, original_bytes) or (None, None) if not found.
    """
    filepath = Path(filepath).resolve()
    # Path traversal guard — must be a regular file, no symlink escape
    if not filepath.is_file():
        raise ValueError(f"Not a valid file: {filepath}")
    if '..' in filepath.parts:
        raise ValueError(f"Path traversal detected: {filepath}")
    raw = filepath.read_bytes()

    idx = raw.find(SOMA_MARKER)
    if idx == -1:
        return None, None

    original = raw[:idx]
    manifest_bytes = raw[idx + len(SOMA_MARKER):]
    manifest = json.loads(manifest_bytes.decode("utf-8").strip())
    return manifest, original


def inject_c2pa_into_file(filepath: str | Path, output: Optional[str | Path] = None) -> dict:
    """
    Read a .soma wrapped file, inject C2PA block into the manifest, and
    write back (in-place or to output path).

    Returns the updated manifest dict.
    """
    filepath = Path(filepath)
    output = Path(output) if output else filepath

    manifest, original = extract_manifest_from_file(filepath)
    if manifest is None or original is None:
        raise ValueError(f"No Somantix manifest found in {filepath}")

    bridge = C2PABridge(manifest)
    updated = bridge.inject()

    manifest_json = json.dumps(updated, indent=2).encode("utf-8")
    wrapped = original + SOMA_MARKER + manifest_json

    # Safe write: temp file first, then atomic rename
    tmp_fd, tmp_path = tempfile.mkstemp(dir=output.parent, suffix='.tmp')
    try:
        with os.fdopen(tmp_fd, 'wb') as tmp_f:
            tmp_f.write(wrapped)
        Path(tmp_path).replace(output)
    except Exception:
        try: os.unlink(tmp_path)
        except OSError: pass
        raise
    return updated


def compute_file_hash(filepath: str | Path) -> str:
    """Compute SHA-256 hash of a file, returning 'sha256:...' format."""
    h = hashlib.sha256()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            h.update(chunk)
    return "sha256:" + h.hexdigest()


# ═══════════════════════════════════════════════════════════════
# JUMBF STUB (future: real C2PA binary embedding)
# ═══════════════════════════════════════════════════════════════

class JUMBFStub:
    """
    Placeholder for real JUMBF (JPEG Universal Metadata Box Format)
    embedding. In production, this would use the c2pa-python library
    to embed C2PA manifests as JUMBF boxes inside JPEG/PNG/MP4.

    For now, provides the same data as a JSON sidecar.
    """

    def __init__(self, manifest: dict):
        self.manifest = manifest

    def to_json_sidecar(self, path: str | Path) -> Path:
        """Write C2PA block as a JSON sidecar file (.c2pa.json)."""
        path = Path(path)
        sidecar = path.with_suffix(".c2pa.json")
        c2pa_block = self.manifest.get("c2pa")
        if not c2pa_block:
            bridge = C2PABridge(self.manifest)
            c2pa_block = bridge.build()

        sidecar.write_text(json.dumps(c2pa_block, indent=2), encoding="utf-8")
        return sidecar

    def embed_jumbf(self, filepath: str | Path) -> None:
        """
        Stub: In production, this would embed a real JUMBF box.
        Raises NotImplementedError for now.

        To enable real JUMBF embedding:
            pip install c2pa-python
        Then replace this stub with c2pa.Builder usage.
        """
        raise NotImplementedError(
            "Real JUMBF embedding requires c2pa-python. "
            "Install with: pip install c2pa-python. "
            "Use to_json_sidecar() for JSON-based C2PA output."
        )


# ═══════════════════════════════════════════════════════════════
# CLI ENTRY POINT
# ═══════════════════════════════════════════════════════════════

def main():
    """CLI entry point for quick C2PA injection."""
    import argparse

    parser = argparse.ArgumentParser(
        description="Somantix C2PA Bridge — inject C2PA assertions into .soma files"
    )
    sub = parser.add_subparsers(dest="command")

    # inject command
    inj = sub.add_parser("inject", help="Inject C2PA block into a .soma file")
    inj.add_argument("file", help="Path to .soma wrapped file")
    inj.add_argument("-o", "--output", help="Output path (default: in-place)")

    # validate command
    val = sub.add_parser("validate", help="Validate C2PA block in a .soma file")
    val.add_argument("file", help="Path to .soma wrapped file")

    # sidecar command
    sc = sub.add_parser("sidecar", help="Export C2PA block as JSON sidecar")
    sc.add_argument("file", help="Path to .soma wrapped file")
    sc.add_argument("-o", "--output", help="Output sidecar path")

    args = parser.parse_args()

    if args.command == "inject":
        updated = inject_c2pa_into_file(args.file, args.output)
        n = len(updated.get("c2pa", {}).get("assertions", []))
        print(f"✓ Injected C2PA block with {n} assertions")
        print(f"  Claim generator: {updated['c2pa']['claim_generator']}")

    elif args.command == "validate":
        manifest, _ = extract_manifest_from_file(args.file)
        if manifest is None:
            print("✗ No Somantix manifest found")
            return
        result = validate_c2pa_block(manifest.get("c2pa"))
        if result.valid:
            print(f"✓ C2PA block valid ({result.assertion_count} assertions)")
        else:
            print(f"✗ C2PA block invalid:")
            for e in result.errors:
                print(f"  ERROR: {e}")
        for w in result.warnings:
            print(f"  WARN: {w}")

    elif args.command == "sidecar":
        manifest, _ = extract_manifest_from_file(args.file)
        if manifest is None:
            print("✗ No Somantix manifest found")
            return
        stub = JUMBFStub(manifest)
        out = stub.to_json_sidecar(args.output or args.file)
        print(f"✓ Sidecar written to {out}")

    else:
        parser.print_help()


if __name__ == "__main__":
    main()
