refactoring

2026-05-24 12:24:30 +02:00
parent 1422cc5fb2
commit 3c3f2a7705
8 changed files with 2272 additions and 742 deletions
--- a/backtest_optimizer/report.py
+++ b/backtest_optimizer/report.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+"""
+report.py
+=========
+Generazione di report (XLSX + PNG) dei risultati della grid search.
+
+- write_full_report(df_results, agg, output_dir)
+    → genera 4 file:
+       1) grid_search_full.xlsx        (tutti i fold, una riga per (ISIN, combo, fold))
+       2) grid_search_aggregate.xlsx   (aggregato per combo, ordinato per Sharpe medio)
+       3) heatmap_*.png                (heatmap delle metriche su coppie di parametri)
+       4) top_combos.xlsx              (top-K combinazioni con confronto con baseline)
+
+L'idea è che il PM possa aprire SOLO grid_search_aggregate.xlsx e capire
+in 30 secondi quale set di parametri usare in produzione.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+def write_full_report(
+    df_results: pd.DataFrame,
+    agg: pd.DataFrame,
+    output_dir: Path,
+    *,
+    primary_metric: str = "Sharpe",
+    top_k: int = 25,
+    baseline_params: Optional[dict] = None,
+) -> dict:
+    """
+    Scrive tutti i file di output e ritorna un dict con i path.
+
+    Parameters
+    ----------
+    df_results : DataFrame "lungo" prodotto da run_grid_search
+    agg : DataFrame aggregato prodotto da aggregate_results
+    output_dir : Path
+    primary_metric : str
+    top_k : int
+        Numero di top combinazioni da analizzare in dettaglio
+    baseline_params : dict, opzionale
+        Parametri della config attuale di produzione, per confronto. Esempio:
+        {"Wp":60, "Ha":10, "k":25, "theta_entry":0.005, "sl_bps":300, "tp_bps":800,
+         "trail_bps":300, "time_stop_bars":20, "decision_every":1, "min_holding_bars":0}
+    """
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    paths = {}
+
+    # === 1. Dataset completo ===
+    path_full = output_dir / "grid_search_full.xlsx"
+    df_results.to_excel(path_full, index=False)
+    paths["full"] = path_full
+    print(f"[REPORT] Salvato {path_full} ({len(df_results):,} righe)")
+
+    # === 2. Aggregato per combinazione ===
+    path_agg = output_dir / "grid_search_aggregate.xlsx"
+    with pd.ExcelWriter(path_agg, engine="openpyxl") as xw:
+        agg.to_excel(xw, sheet_name="ByCombo", index=False)
+        if baseline_params is not None:
+            baseline_row = _find_baseline_row(agg, baseline_params)
+            if baseline_row is not None:
+                pd.DataFrame([baseline_row]).to_excel(xw, sheet_name="Baseline", index=False)
+    paths["aggregate"] = path_agg
+    print(f"[REPORT] Salvato {path_agg}")
+
+    # === 3. Top-K combinazioni ===
+    top = agg.head(top_k).copy()
+    if baseline_params is not None:
+        baseline_row = _find_baseline_row(agg, baseline_params)
+        if baseline_row is not None:
+            metric_mean = f"{primary_metric}_mean"
+            base_metric = baseline_row.get(metric_mean, np.nan)
+            top["delta_vs_baseline"] = top[metric_mean] - base_metric
+            top["pct_improvement"] = (top[metric_mean] / base_metric - 1) * 100 if base_metric and base_metric != 0 else np.nan
+
+    path_top = output_dir / "top_combos.xlsx"
+    top.to_excel(path_top, index=False)
+    paths["top"] = path_top
+    print(f"[REPORT] Salvato {path_top}")
+
+    # === 4. Heatmaps ===
+    heatmap_paths = _generate_heatmaps(agg, output_dir, primary_metric=primary_metric)
+    paths.update(heatmap_paths)
+
+    # === 5. Top combos equity comparison ===
+    # (Skipped if no per-fold equity data — left as hook for future enhancement)
+
+    return paths
+
+
+def _find_baseline_row(agg: pd.DataFrame, baseline_params: dict) -> Optional[dict]:
+    """Trova la riga corrispondente ai parametri baseline (se presente)."""
+    mask = pd.Series(True, index=agg.index)
+    for k, v in baseline_params.items():
+        if k not in agg.columns:
+            continue
+        if v is None:
+            mask &= agg[k].isna()
+        else:
+            mask &= (agg[k] == v)
+    if mask.sum() == 0:
+        return None
+    return agg[mask].iloc[0].to_dict()
+
+
+def _generate_heatmaps(
+    agg: pd.DataFrame,
+    output_dir: Path,
+    *,
+    primary_metric: str = "Sharpe",
+) -> dict:
+    """
+    Per ogni coppia "interessante" di parametri, genera una heatmap
+    del primary_metric (mediato sugli altri parametri).
+    """
+    paths = {}
+    metric_col = f"{primary_metric}_mean"
+    if metric_col not in agg.columns:
+        return paths
+
+    pairs_to_plot = [
+        ("decision_every", "tp_bps", "decision_vs_tp"),
+        ("decision_every", "sl_bps", "decision_vs_sl"),
+        ("tp_bps", "sl_bps", "tp_vs_sl"),
+        ("Wp", "Ha", "wp_vs_ha"),
+        ("decision_every", "min_holding_bars", "decision_vs_holding"),
+        ("k", "theta_entry", "k_vs_theta"),
+    ]
+
+    for px, py, label in pairs_to_plot:
+        if px not in agg.columns or py not in agg.columns:
+            continue
+        if agg[px].nunique() <= 1 or agg[py].nunique() <= 1:
+            continue
+        try:
+            pivot = agg.pivot_table(index=py, columns=px, values=metric_col, aggfunc="mean")
+            n_pivot = agg.pivot_table(index=py, columns=px, values="N_Trades_avg", aggfunc="mean")
+            fig, ax = plt.subplots(figsize=(8, 5.5), dpi=120)
+            im = ax.imshow(pivot.values, cmap="RdYlGn", aspect="auto")
+            ax.set_xticks(range(len(pivot.columns)))
+            ax.set_xticklabels(pivot.columns, fontsize=9)
+            ax.set_yticks(range(len(pivot.index)))
+            ax.set_yticklabels(pivot.index, fontsize=9)
+            ax.set_xlabel(px, fontsize=10)
+            ax.set_ylabel(py, fontsize=10)
+            ax.set_title(f"{primary_metric} medio per {px} × {py} (annotazione: N_Trades_avg)", fontsize=10)
+            # Annota celle: metrica sopra, n_trades sotto
+            for i in range(pivot.shape[0]):
+                for j in range(pivot.shape[1]):
+                    val = pivot.iloc[i, j]
+                    if not np.isfinite(val):
+                        continue
+                    n_val = n_pivot.iloc[i, j] if not n_pivot.empty else np.nan
+                    text = f"{val:.2f}"
+                    if np.isfinite(n_val):
+                        text += f"\nn={int(n_val)}"
+                    ax.text(j, i, text, ha="center", va="center", fontsize=7,
+                            color="black" if abs(val) < 0.8 else "white")
+            fig.colorbar(im, ax=ax, label=primary_metric)
+            plt.tight_layout()
+            p = output_dir / f"heatmap_{label}.png"
+            fig.savefig(p, dpi=120, bbox_inches="tight")
+            plt.close(fig)
+            paths[f"heatmap_{label}"] = p
+            print(f"[REPORT] Salvata heatmap {p}")
+        except Exception as e:
+            print(f"[REPORT] Heatmap {label} fallita: {e}")
+
+    return paths
+
+
+def summary_text_table(agg: pd.DataFrame, top_k: int = 10, primary_metric: str = "Sharpe") -> str:
+    """
+    Restituisce una tabella testuale dei top-K combinazioni, formattata per la console.
+    """
+    if agg is None or agg.empty:
+        return "(nessun risultato)"
+    cols_to_show = [
+        "Wp", "Ha", "k", "decision_every", "tp_bps", "sl_bps", "trail_bps",
+        "min_holding_bars",
+        f"{primary_metric}_mean", "Stability", "N_Trades_avg",
+        "CAGR_avg", "MaxDD_avg", "Calmar_avg",
+    ]
+    cols_to_show = [c for c in cols_to_show if c in agg.columns]
+    return agg[cols_to_show].head(top_k).to_string(index=False)
+
+
+__all__ = ["write_full_report", "summary_text_table"]