#!/usr/bin/env python3
"""Phase 14m — dock the cavity-18 fragments (indazole, ibuprofen) into the
Plasmodium falciparum TYMS pocket and compare scores residue-by-residue
to human TYMS.

Steps
  1. super-align Pf 1J3I chain C onto human 1HVY chain A in PyMOL
  2. write the aligned Pf chain (now in human coordinate frame) as PDB
  3. obabel → PDBQT receptor
  4. Vina dock with the same box used for human cavity 18:
        centre = (4.564, -12.706, -14.884)
        size   = 22 × 22 × 22 Å (matches human cavity-18 box)
  5. compare top1 score to the human top1 from results_summary.csv
  6. extract contact residues at ≤ 4 Å in the Pf complex and tabulate the
     residue-identity differences

Results saved to:
  14_inhibitor_design/07_advanced_methods/pf_specificity_dock/
    pf_ts_aligned.pdb
    pf_ts_aligned.pdbqt
    indazole_pf.pdbqt  ibuprofen_pf.pdbqt   (docked poses)
    pf_specificity_results.json
"""
from __future__ import annotations
import json, subprocess, shutil, os
from pathlib import Path

REPO = Path(__file__).resolve().parents[2]
OUT  = REPO / "14_inhibitor_design" / "07_advanced_methods" / "pf_specificity_dock"
OUT.mkdir(parents=True, exist_ok=True)
PYMOL = shutil.which("pymol") or "/opt/homebrew/bin/pymol"
VINA  = shutil.which("vina")  or "/opt/homebrew/bin/vina"
OBABEL = shutil.which("obabel") or "/opt/homebrew/bin/obabel"

HUMAN_PDB = REPO / "03_structure" / "1hvy.pdb"
PF_PDB    = "/tmp/pf_1J3I.pdb"

# Box from FPocket cavity 18 (centroid; size matches human dock)
BOX_CENTRE = (4.564, -12.706, -14.884)
BOX_SIZE   = (22.0, 22.0, 22.0)

LIGANDS = {
    "indazole":  REPO / "14_inhibitor_design" / "04_allosteric" / "ligands" / "frag_CID7032.pdbqt",
    "ibuprofen": REPO / "14_inhibitor_design" / "04_allosteric" / "ligands" / "frag_CID3672.pdbqt",
}

HUMAN_TOP1 = {"indazole": -7.523, "ibuprofen": -7.276}


def run(cmd, **kw):
    print(f"  ▶ {' '.join(str(c) for c in cmd)}")
    return subprocess.run(cmd, check=True, capture_output=True, text=True, **kw)


def step1_super_align_pf():
    """Super-align 1J3I chain C onto 1HVY chain A and save the aligned Pf chain."""
    aligned = OUT / "pf_ts_aligned.pdb"
    pml = f"""
reinitialize
load {HUMAN_PDB}, hu
load {PF_PDB}, pf
remove hu and not chain A
remove pf and not (chain C and resi 281-608)
super pf, hu
remove hu
save {aligned}, pf
"""
    pml_path = OUT / "_super.pml"
    pml_path.write_text(pml)
    run([PYMOL, "-cq", str(pml_path)])
    n = sum(1 for ln in aligned.read_text().splitlines() if ln.startswith("ATOM"))
    print(f"    aligned Pf chain → {aligned}: {n} ATOM lines")
    return aligned


def step2_make_pdbqt(pdb_path: Path) -> Path:
    """obabel → PDBQT receptor (no autodock-tools needed)."""
    pdbqt = pdb_path.with_suffix(".pdbqt")
    run([OBABEL, str(pdb_path), "-O", str(pdbqt), "-xr"])
    print(f"    PDBQT → {pdbqt}  ({pdbqt.stat().st_size} bytes)")
    return pdbqt


def step3_vina_dock(receptor: Path, ligand: Path, label: str) -> dict:
    """Run Vina docking; return top1 score + pose."""
    out_pdbqt = OUT / f"{label}_pf.pdbqt"
    cmd = [
        VINA,
        "--receptor", str(receptor),
        "--ligand",   str(ligand),
        "--center_x", str(BOX_CENTRE[0]),
        "--center_y", str(BOX_CENTRE[1]),
        "--center_z", str(BOX_CENTRE[2]),
        "--size_x",   str(BOX_SIZE[0]),
        "--size_y",   str(BOX_SIZE[1]),
        "--size_z",   str(BOX_SIZE[2]),
        "--out",      str(out_pdbqt),
        "--exhaustiveness", "16",
        "--cpu",      "4",
        "--seed",     "42",
    ]
    p = run(cmd)
    # Parse Vina's top1 score
    top1 = None
    for line in p.stdout.splitlines():
        if line.strip().startswith("1 ") or line.strip().startswith("1\t"):
            parts = line.split()
            try:
                top1 = float(parts[1])
                break
            except (ValueError, IndexError): pass
    if top1 is None:
        # fallback: scan REMARK lines in output PDBQT
        with out_pdbqt.open() as f:
            for ln in f:
                if "REMARK VINA RESULT" in ln:
                    top1 = float(ln.split()[3]); break
    return {"label": label, "vina_top1": top1, "pose_pdbqt": str(out_pdbqt)}


def step4_contact_residues(complex_pdb: Path, ligand_atoms: list) -> list:
    """Stub: extract chain-C residues within 4 Å of ligand atoms."""
    contacts = set()
    cutoff2 = 4.0 ** 2
    for ln in complex_pdb.read_text().splitlines():
        if not ln.startswith("ATOM") or ln[21] != "C": continue
        try:
            x = float(ln[30:38]); y = float(ln[38:46]); z = float(ln[46:54])
        except ValueError: continue
        for lx, ly, lz in ligand_atoms:
            if (x-lx)**2 + (y-ly)**2 + (z-lz)**2 < cutoff2:
                resi = int(ln[22:26]); resn = ln[17:20].strip()
                contacts.add((resi, resn)); break
    return sorted(contacts)


def step4_extract_ligand_coords(pdbqt_path: Path) -> list:
    """Read top1 model from a Vina output PDBQT, return atom (x,y,z) list."""
    atoms = []
    in_model1 = False
    for ln in pdbqt_path.read_text().splitlines():
        if ln.startswith("MODEL 1"): in_model1 = True; continue
        if ln.startswith("ENDMDL") and in_model1: break
        if in_model1 and ln.startswith(("ATOM", "HETATM")):
            try:
                atoms.append((float(ln[30:38]), float(ln[38:46]), float(ln[46:54])))
            except ValueError: pass
    return atoms


def main():
    print("=== Phase 14m — Pf cavity-18 docking + specificity ===")
    aligned = step1_super_align_pf()
    receptor = step2_make_pdbqt(aligned)

    results = {"box_centre": BOX_CENTRE, "box_size": BOX_SIZE,
               "receptor": str(receptor), "ligands": {}}
    for label, lig in LIGANDS.items():
        if not lig.exists():
            print(f"  ! missing ligand: {lig}"); continue
        print(f"\n--- docking {label} into Pf cavity 18 ---")
        r = step3_vina_dock(receptor, lig, label)
        atoms = step4_extract_ligand_coords(Path(r["pose_pdbqt"]))
        r["contact_residues_pf"] = step4_contact_residues(aligned, atoms)
        r["human_top1"]    = HUMAN_TOP1[label]
        r["delta_pf_minus_human"] = r["vina_top1"] - HUMAN_TOP1[label] if r["vina_top1"] is not None else None
        print(f"  Pf top1   = {r['vina_top1']:.2f}  kcal/mol")
        print(f"  Hs top1   = {r['human_top1']:.2f}  kcal/mol")
        if r["delta_pf_minus_human"] is not None:
            print(f"  Δ (Pf − Hs) = {r['delta_pf_minus_human']:+.2f}")
        print(f"  Pf contact residues ({len(r['contact_residues_pf'])}): "
              + ", ".join(f"{rn}{ri}" for ri, rn in r['contact_residues_pf']))
        results["ligands"][label] = r

    out_json = OUT / "pf_specificity_results.json"
    out_json.write_text(json.dumps(results, indent=2, default=str))
    print(f"\n  → {out_json}")


if __name__ == "__main__":
    main()
