#!/usr/bin/env python3
"""Plot the predicted Pf-vs-human cavity-18 affinity comparison.

Pulls real Vina top1 numbers for indazole + ibuprofen at:
  - human 1HVY chain A cavity 18  (from results_summary.csv)
  - Plasmodium 1J3I chain C cavity-18-equivalent  (from pf_specificity_results.json)
Shows both as ΔG_bind bars and the per-fragment ΔΔG (Pf − Hs) callout.
"""
import json, csv
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mp
import numpy as np

REPO = Path(__file__).resolve().parents[2]
OUT = REPO / "14_inhibitor_design" / "presentation" / "figures" / "pf_vs_hs_affinity.png"
PF_J = REPO / "14_inhibitor_design" / "07_advanced_methods" / "pf_specificity_dock" / "pf_specificity_results.json"

pf = json.loads(PF_J.read_text())["ligands"]

LIGS = ["indazole", "ibuprofen"]
HUMAN = {"indazole": -7.523, "ibuprofen": -7.276}

dG_hs = [HUMAN[l] for l in LIGS]
dG_pf = [pf[l]["vina_top1"] for l in LIGS]
ddG   = [pf[l]["delta_pf_minus_human"] for l in LIGS]

# Two-row grid: chart row + caption row.  Caption gets its own axes
# rather than fig.text() so it can't overlap the chart legend.
fig = plt.figure(figsize=(14.5, 7.5))
gs = fig.add_gridspec(2, 2, height_ratios=[5.5, 1.4], width_ratios=[2.2, 1],
                       left=0.07, right=0.985, top=0.90, bottom=0.06,
                       wspace=0.30, hspace=0.18)
ax_a = fig.add_subplot(gs[0, 0])
ax_b = fig.add_subplot(gs[0, 1])
ax_cap = fig.add_subplot(gs[1, :])
ax_cap.axis("off")

# --- Panel (a) — grouped bars: Hs vs Pf per ligand ---
x = np.arange(len(LIGS))
w = 0.36
bh = ax_a.bar(x - w/2, dG_hs, w, label="human  TYMS (1HVY chain A)",
              color="#2563EB", edgecolor="#1D1F24", linewidth=0.5)
bp = ax_a.bar(x + w/2, dG_pf, w, label="Plasmodium  TS (1J3I chain C)",
              color="#B8327E", edgecolor="#1D1F24", linewidth=0.5)
ax_a.set_xticks(x); ax_a.set_xticklabels([l.title() for l in LIGS], fontsize=12)
ax_a.set_ylabel("Vina top1   ΔG_bind  (kcal/mol)", fontsize=11)
ax_a.set_title("(a)   Per-ligand binding score  ·  human vs Plasmodium",
               loc="left", fontsize=12, fontweight="bold")
ax_a.axhline(0, color="#3A3628", lw=0.5)
ax_a.set_facecolor("#FDFCF7")
ax_a.grid(True, axis="y", color="#D9D4C2", linewidth=0.5, alpha=0.7)
for s in ("top","right"): ax_a.spines[s].set_visible(False)

# Tight y-limits — both bars are negative; leave space only on the side
# we need (bottom labels + tiny top headroom for legend & noise text)
y_min = min(dG_pf + dG_hs) - 1.0
y_max = 1.5
ax_a.set_ylim(y_min, y_max)

# Value labels — always INSIDE the bar (negative bars; place toward zero)
for b, v in list(zip(bh, dG_hs)) + list(zip(bp, dG_pf)):
    ax_a.annotate(f"{v:+.2f}",
                  (b.get_x() + b.get_width()/2, v),
                  textcoords="offset points", xytext=(0, 6),
                  ha="center", fontsize=10, fontweight="bold",
                  color="#FFFFFF")

# Vina noise band (-0.85 to +0.85)
ax_a.axhspan(-0.85, 0.85, color="#D9D4C2", alpha=0.35, zorder=0)
ax_a.text(len(LIGS) - 0.5, -0.85, "Vina noise floor  ±0.85", fontsize=9,
          color="#8A8470", style="italic", va="top", ha="right")

# Legend OUTSIDE the bar area, anchored at the bottom of the panel
ax_a.legend(loc="lower center", bbox_to_anchor=(0.5, -0.18),
            ncol=2, frameon=False, fontsize=10)

# --- Panel (b) — ΔΔG selectivity bars ---
colours = ["#C84427" if v > 1 else "#2563EB" if v < -1 else "#8A8470" for v in ddG]
b2 = ax_b.bar(x, ddG, color=colours, edgecolor="#1D1F24", linewidth=0.5,
              width=0.55)
ax_b.set_xticks(x); ax_b.set_xticklabels([l.title() for l in LIGS], fontsize=12)
ax_b.set_ylabel("Δ(Pf − Hs)  ΔG_bind   (kcal/mol)\npositive = Pf binds WORSE",
                fontsize=10.5)
ax_b.set_title("(b)   Predicted selectivity",
               loc="left", fontsize=12, fontweight="bold")
ax_b.axhline(0, color="#3A3628", lw=0.7)
ax_b.set_facecolor("#FDFCF7")
ax_b.grid(True, axis="y", color="#D9D4C2", linewidth=0.5, alpha=0.7)
for s in ("top","right"): ax_b.spines[s].set_visible(False)
# Top headroom for labels — extend y by ~25%
ax_b.set_ylim(0, max(ddG) * 1.28)
for b, v in zip(b2, ddG):
    ax_b.annotate(f"{v:+.2f}", (b.get_x() + b.get_width()/2, v),
                  textcoords="offset points", xytext=(0, 5),
                  ha="center", fontsize=12, fontweight="bold",
                  color="#C84427")

# --- Caveat textbox in its own subplot — guaranteed non-overlapping ---
caption = (
    "★  Real numbers from AutoDock Vina docking with the same fragments + the structurally aligned Pf chain C.\n"
    "Box centred on the human cavity-18 centroid (4.56, −12.71, −14.88), 22³ Å³, exhaustiveness 16, seed 42.\n\n"
    "Indazole and ibuprofen dock WELL in human cavity 18 (≈ −7.4 kcal/mol) but POORLY in the Pf-equivalent region (≈ −3 kcal/mol — essentially "
    "does not bind), with a completely different contact-residue palette (human: Phe55/Phe200/Lys283 cluster · Pf: Arg470/Arg471/Leu473 cluster).  "
    "These are not a parasite-selective lead — but the 4 kcal/mol gap IS structure-level evidence that cavity 18 differs enough between species "
    "to require different ligand chemistry.  That divergence is the engineering handle for parasite-selective design.")
ax_cap.text(0.005, 0.95, caption, transform=ax_cap.transAxes,
            fontsize=9.7, color="#3A3628", va="top", ha="left",
            family="sans-serif", wrap=True,
            bbox=dict(facecolor="#F5F3EC", edgecolor="#D9D4C2", linewidth=0.7,
                      boxstyle="round,pad=0.6"))

fig.suptitle(
    "Phase 14m — Predicted cavity-18 affinity  ·  human TYMS  vs  Plasmodium falciparum TS",
    fontsize=13.5, fontweight="bold", color="#1D1F24", y=0.975)
fig.text(0.07, 0.93,
         "AutoDock Vina 1.2.7 · exhaustiveness 16 · seed 42 · same box on both receptors  ·  super-aligned Pf chain (RMSD 0.74 Å)",
         fontsize=9.5, color="#8A8470", style="italic")

plt.savefig(OUT, dpi=150, bbox_inches="tight", facecolor="#F5F3EC")
print(f"→ {OUT}  ({OUT.stat().st_size/1024:.0f} KB)")
