# -*- coding: utf-8 -*- """ Trading Pattern Recognition + kNN Walk-Forward Backtest (ETF) ============================================================== Script end-to-end per: - Caricare universo (Excel) e dati (DB) UNA SOLA VOLTA (refactor v2.1) - Calcolare Hurst + Pattern signals (solo INFORMATIVI in v2.1, non decisionali) - Eseguire walk-forward k-NN (solo long) con regole di uscita SL/TP/TRAIL/TIME/FLIP - Costruire portafogli dinamici (Equal Weight + Risk Parity + varianti v2 Config B) - Generare metriche finali, equity curves, heatmap mensili, trade report Modifiche v2.1 (refactor): - HURST RIMOSSO dalla logica decisionale: theta_entry = THETA globale (PATTERN_CONFIG.theta). Le funzioni hurst_rs_returns / hurst_dfa_returns restano disponibili e producono ancora il report hurst_by_isin.xlsx come informazione qualitativa. - CACHE DATI DB UNIFICATA: una sola lettura per ISIN (era doppia: linee 813 e 1177 dell'originale). Risparmio ~50% sul tempo di esecuzione DB. - FETCH PREZZI OPEN/CLOSE in BULK: una chiamata per tutto l'universo prima del loop backtest, invece di una per-ISIN dentro il loop. - IMPORT INLINE rimossi (5 pandas + 6 numpy + 4 matplotlib ridondanti). - MAIN() WRAPPER per esecuzione idempotente. - Logica matematica del kNN e dei portafogli INVARIATA al 100%: stesse formule, stessi parametri, stessi output. Solo organizzazione del codice migliorata. Output prodotti (cartella `output/`): - hurst_by_isin.xlsx (informativo, regime classification) - pattern_signals.xlsx (segnali + quality scores) - forward_bt_signals.xlsx (segnali walk-forward per ISIN) - forward_bt_summary.xlsx (metriche per ISIN) - trades_report.xlsx (trade-by-trade) - portfolio_metrics.xlsx (metriche EW, RP, EW_v2, RP_v2) - daily_from_trades.csv (PnL giornaliero ricostruito) - weights_daily.xlsx (pesi giornalieri per strategia) - final_metrics.xlsx (metriche per N=6..20) - performance_attribution.xlsx Pipeline: 1) Universo Excel -> meta_df 2) Connessione DB 3) Caricamento serie dati per TUTTI gli ISIN (una volta) -> assets_data 4) Per ogni ISIN: Hurst + Pattern (informativi) -> hurst_rows, pattern_rows 5) Bulk fetch prezzi open/close per esecuzione t+1 -> open_returns_map 6) Per ogni ISIN: walk-forward kNN backtest -> bt_signals, bt_summary 7) Costruzione portafogli dinamici (EW, RP, EW_v2, RP_v2) -> equity, weights 8) Trade report + ricostruzione daily PnL + metriche finali """ from __future__ import annotations import json import re import ssl import time from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from urllib.error import HTTPError, URLError from urllib.request import urlopen import matplotlib.pyplot as plt import numpy as np import pandas as pd import sqlalchemy as sa from scipy import linalg from sqlalchemy import text from shared_utils import ( build_pattern_library, characterize_window, detect_column, load_config, predict_from_library, read_connection_txt, require_section, require_value, z_norm, ) # ============================================================================= # PLOT SAVING HELPER (no recursion, evita conflitti import) # ============================================================================= def savefig_safe(path, dpi: int = 150, bbox_inches: str = "tight") -> None: """ Salva la figura corrente con creazione automatica della directory. Funzione non-ricorsiva (evita conflitti con override di plt.savefig). """ import os p = str(path) folder = os.path.dirname(p) or "." try: os.makedirs(folder, exist_ok=True) except Exception as exc: print(f"[WARN] impossibile creare la cartella '{folder}': {exc}. " f"Salvo nella directory corrente.") p = os.path.basename(p) plt.savefig(p, dpi=dpi, bbox_inches=bbox_inches) # ============================================================================= # CONFIG LOADING # ============================================================================= CONFIG = load_config() DB_CONFIG = require_section(CONFIG, "db") PATTERN_CONFIG = require_section(CONFIG, "pattern") TAGGING_CONFIG = require_section(CONFIG, "tagging") RANKING_CONFIG = require_section(CONFIG, "ranking") PATHS_CONFIG = require_section(CONFIG, "paths") HURST_CONFIG = CONFIG.get("hurst", {}) RUN_CONFIG = CONFIG.get("run", {}) SIGNALS_CONFIG = CONFIG.get("signals", {}) PRICES_CONFIG = CONFIG.get("prices", {}) STRATEGIES_CONFIG: Dict[str, Any] = CONFIG.get("strategies", {}) # ============================================================================= # PATHS # ============================================================================= OUTPUT_DIR = Path(PATHS_CONFIG.get("output_dir", "out_etf")) PLOT_DIR = Path(PATHS_CONFIG.get("plot_dir", "plot_etf")) OUTPUT_DIR.mkdir(parents=True, exist_ok=True) PLOT_DIR.mkdir(parents=True, exist_ok=True) UNIVERSO_XLSX = PATHS_CONFIG.get("input_universe", "Input/Universo per Trading System.xlsx") OUTPUT_HURST_XLSX = OUTPUT_DIR / "hurst_by_isin.xlsx" OUTPUT_PATTERN_XLSX = OUTPUT_DIR / "pattern_signals.xlsx" ERROR_LOG_CSV = OUTPUT_DIR / "errori_isin.csv" FORWARD_BT_SIGNALS_XLSX = OUTPUT_DIR / "forward_bt_signals.xlsx" FORWARD_BT_SUMMARY_XLSX = OUTPUT_DIR / "forward_bt_summary.xlsx" TRADES_REPORT_XLSX = OUTPUT_DIR / "trades_report.xlsx" PERF_ATTRIB_XLSX = OUTPUT_DIR / "performance_attribution.xlsx" DAILY_FROM_TRADES_CSV = OUTPUT_DIR / "daily_from_trades.csv" DAILY_FROM_TRADES_XLSX = OUTPUT_DIR / "daily_from_trades.xlsx" WEIGHTS_DAILY_XLSX = OUTPUT_DIR / "weights_daily.xlsx" FINAL_METRICS_XLSX = OUTPUT_DIR / "final_metrics.xlsx" # ============================================================================= # PARAMETRI GLOBALI # ============================================================================= # Stored procedure e parametri DB STORED_PROC = str(require_value(DB_CONFIG, "stored_proc", "db")) N_BARS = int(require_value(DB_CONFIG, "n_bars", "db")) PTF_CURR = str(require_value(DB_CONFIG, "ptf_curr", "db")) RANKING_WINDOW_BARS = int(RANKING_CONFIG.get("rolling_window_bars", N_BARS)) RP_LOOKBACK = int(SIGNALS_CONFIG.get("risk_parity_lookback", 60)) # Prezzi open/close (API euronext) OPEN_PRICE_BASE_URL = str(PRICES_CONFIG.get( "base_url", "https://fin.scorer.app/finance/etf-inv/history" )) OPEN_MAX_RETRY = int(PRICES_CONFIG.get("max_retry", 3)) OPEN_SLEEP_SEC = float(PRICES_CONFIG.get("sleep_sec", 0.1)) OPEN_TIMEOUT = float(PRICES_CONFIG.get("timeout", 10)) OPEN_CACHE_DIR = Path(PRICES_CONFIG.get("cache_dir", OUTPUT_DIR / "price_cache")) RECOMPUTE_PORTF_FROM_OPEN = bool(PRICES_CONFIG.get("recompute_portfolio_open", False)) # Pattern-matching (iper-parametri kNN) WP = int(require_value(PATTERN_CONFIG, "wp", "pattern")) HA = int(require_value(PATTERN_CONFIG, "ha", "pattern")) KNN_K = int(require_value(PATTERN_CONFIG, "knn_k", "pattern")) THETA = float(require_value(PATTERN_CONFIG, "theta", "pattern")) EMBARGO_RAW = require_value(PATTERN_CONFIG, "embargo", "pattern") EMBARGO = int(EMBARGO_RAW) if EMBARGO_RAW is not None else (WP + HA) # Tagging rule-based (soglie informative) Z_REV = float(require_value(TAGGING_CONFIG, "z_rev", "tagging")) Z_VOL = float(require_value(TAGGING_CONFIG, "z_vol", "tagging")) STD_COMP_PCT = float(require_value(TAGGING_CONFIG, "std_comp_pct", "tagging")) # Ranking e selezione TOP_N_MAX = int(require_value(RANKING_CONFIG, "top_n_max", "ranking")) RP_MAX_WEIGHT_RAW = RANKING_CONFIG.get("rp_max_weight") RP_MAX_WEIGHT = ( float(RP_MAX_WEIGHT_RAW) if RP_MAX_WEIGHT_RAW is not None else 2.0 / max(TOP_N_MAX, 1) ) SCORE_VERBOSE = bool(RANKING_CONFIG.get("score_verbose", False)) SCORE_WEIGHTS = RANKING_CONFIG.get("score_weights") # Hurst (parametri di calcolo, mantenuti per uso informativo) HURST_MIN_LENGTH = int(HURST_CONFIG.get("min_length", 200)) HURST_WIN_GRID = HURST_CONFIG.get("win_grid") HURST_MIN_SEGMENTS = int(HURST_CONFIG.get("min_segments", 1)) DAYS_PER_YEAR = int(RUN_CONFIG.get("days_per_year", 252)) TOP_N = int(RUN_CONFIG.get("top_n_default", TOP_N_MAX)) # ============================================================================= # UTILITA' GENERALI (numeriche, formattazione, ETA) # ============================================================================= def clamp01(x): """Forza il valore in [0, 1]. NaN se non finito.""" if not np.isfinite(x): return np.nan return max(0.0, min(1.0, float(x))) def format_eta(seconds: float) -> str: """Formatta una durata in HH:MM:SS o MM:SS.""" if seconds is None or not np.isfinite(seconds) or seconds < 0: return "--:--" seconds = int(round(seconds)) h, rem = divmod(seconds, 3600) m, s = divmod(rem, 60) if h > 0: return f"{h:d}:{m:02d}:{s:02d}" return f"{m:02d}:{s:02d}" def _to_float_safe(x) -> float: """Conversione a float robusta. NaN se fallisce.""" try: return float(x) except (TypeError, ValueError): return np.nan def _coerce_num(s: pd.Series) -> pd.Series: """Conversione robusta a numerico, tollera separatori italiani/europei.""" if pd.api.types.is_numeric_dtype(s): return s txt = s.astype(str).str.strip().str.replace("%", "", regex=False) txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan}) return pd.to_numeric(txt.str.replace(",", "."), errors="coerce") # ============================================================================= # POST-EXECUTION TIMER (per le fasi dopo il backtest) # ============================================================================= _post_timer_state: Dict[str, float] = {} def start_post_timer(total_steps: int = 1) -> None: _post_timer_state["t0"] = time.perf_counter() _post_timer_state["total_steps"] = max(1, total_steps) _post_timer_state["done"] = 0 def checkpoint_post_timer(label: str) -> None: if "t0" not in _post_timer_state: return elapsed = time.perf_counter() - _post_timer_state["t0"] _post_timer_state["done"] += 1 done = _post_timer_state["done"] tot = _post_timer_state["total_steps"] avg = elapsed / max(done, 1) eta = avg * max(0, tot - done) print(f"[POST-TIMER] {label}: elapsed {format_eta(elapsed)} " f"({done}/{tot}, ETA {format_eta(eta)})") # ============================================================================= # HURST (su rendimenti) - MANTENUTE PER USO INFORMATIVO # ============================================================================= # NOTA v2.1: queste funzioni restano disponibili per la generazione del file # hurst_by_isin.xlsx (classificazione regime: mean_reversion / breakout / neutral). # Il valore di Hurst NON viene piu' usato come soglia di entrata theta_entry. # Tutti gli ISIN usano la stessa theta = THETA globale dal config. def hurst_rs_returns(r, win_grid=None, min_seg=None) -> float: """Stima Hurst tramite Rescaled Range analysis sui rendimenti.""" r = pd.Series(r).dropna().astype("float64").values n = len(r) seg_min = HURST_MIN_SEGMENTS if min_seg is None else int(min_seg) if n < HURST_MIN_LENGTH: return np.nan if win_grid is None: base = HURST_WIN_GRID or [16, 24, 32, 48, 64, 96, 128, 192, 256, 384] base = np.array(base, dtype=int) win_grid = [w for w in base if w <= n // 2] if len(win_grid) < 4: max_w = max(16, n // 4) g = sorted({int(max(8, round((n / (2 ** k))))) for k in range(3, 8)}) win_grid = [w for w in g if 8 <= w <= max_w] RS_vals, sizes = [], [] for w in win_grid: if w < 8 or w > n: continue m = n // w if m < seg_min: continue rs_list = [] for i in range(m): seg = r[i * w:(i + 1) * w] seg = seg - np.mean(seg) sd = seg.std(ddof=1) if sd == 0 or not np.isfinite(sd): continue y = np.cumsum(seg) rs = (np.max(y) - np.min(y)) / sd if np.isfinite(rs) and rs > 0: rs_list.append(rs) if rs_list: RS_vals.append(np.mean(rs_list)) sizes.append(w) if len(RS_vals) < 3: return np.nan sizes = np.array(sizes, float) RS_vals = np.array(RS_vals, float) mask = np.isfinite(RS_vals) & (RS_vals > 0) sizes, RS_vals = sizes[mask], RS_vals[mask] if sizes.size < 3: return np.nan slope, _ = np.polyfit(np.log(sizes), np.log(RS_vals), 1) return clamp01(slope) def hurst_dfa_returns(r, win_grid=None) -> float: """Stima Hurst tramite Detrended Fluctuation Analysis.""" r = pd.Series(r).dropna().astype("float64").values n = len(r) if n < HURST_MIN_LENGTH: return np.nan r_dm = r - np.mean(r) y = np.cumsum(r_dm) if win_grid is None: base = HURST_WIN_GRID or [16, 24, 32, 48, 64, 96, 128, 192, 256] base = np.array(base, dtype=int) win_grid = [w for w in base if w <= n // 2] if len(win_grid) < 4: max_w = max(16, n // 4) g = sorted({int(max(8, round((n / (2 ** k))))) for k in range(3, 8)}) win_grid = [w for w in g if 8 <= w <= max_w] F_vals, sizes = [], [] for s in win_grid: if s < 8: continue m = n // s if m < 2: continue rms_list = [] for i in range(m): seg = y[i * s:(i + 1) * s] t = np.arange(s, dtype=float) A = np.vstack([t, np.ones(s)]).T coeff, *_ = np.linalg.lstsq(A, seg, rcond=None) detr = seg - A @ coeff rms = np.sqrt(np.mean(detr ** 2)) if np.isfinite(rms) and rms > 0: rms_list.append(rms) if rms_list: F_vals.append(np.mean(rms_list)) sizes.append(s) if len(F_vals) < 3: return np.nan sizes = np.array(sizes, float) F_vals = np.array(F_vals, float) mask = np.isfinite(F_vals) & (F_vals > 0) sizes, F_vals = sizes[mask], F_vals[mask] if sizes.size < 3: return np.nan slope, _ = np.polyfit(np.log(sizes), np.log(F_vals), 1) return clamp01(slope) # ============================================================================= # METRICHE DI EQUITY (R^2, drawdown, heal index, h_min_100) # ============================================================================= def r2_equity_line(returns: pd.Series) -> float: """R^2 di una regressione log-lineare sulla equity curve (smoothness).""" r = pd.to_numeric(returns, errors="coerce").fillna(0.0) eq = (1.0 + r).cumprod().replace(0, np.nan) y = np.log(eq.dropna()) if len(y) < 5: return np.nan x = np.arange(len(y), dtype=float) a, b = np.polyfit(x, y, 1) y_hat = a * x + b ss_res = float(np.sum((y - y_hat) ** 2)) ss_tot = float(np.sum((y - y.mean()) ** 2)) if ss_tot <= 0: return np.nan return float(1.0 - ss_res / ss_tot) def drawdown_metrics(returns: pd.Series) -> Dict[str, float]: """ Calcola MaxDD, durata e tempo di recupero medio. Restituisce: dict con MaxDD_%, AvgDD_%, AvgDD_len, MaxDD_len, RecoverDays_avg. """ r = pd.to_numeric(returns, errors="coerce").fillna(0.0) if len(r) < 2: return { "MaxDD_%": np.nan, "AvgDD_%": np.nan, "AvgDD_len": np.nan, "MaxDD_len": np.nan, "RecoverDays_avg": np.nan, } eq = (1.0 + r).cumprod() peak = eq.cummax() dd = eq / peak - 1.0 max_dd = float(dd.min()) in_dd = dd < 0 if not in_dd.any(): return { "MaxDD_%": round(max_dd * 100, 2), "AvgDD_%": 0.0, "AvgDD_len": 0.0, "MaxDD_len": 0.0, "RecoverDays_avg": 0.0, } # Identifica drawdown periods dd_periods = [] in_period = False start_idx = None for i, flag in enumerate(in_dd): if flag and not in_period: in_period = True start_idx = i elif not flag and in_period: in_period = False dd_periods.append((start_idx, i - 1, dd.iloc[start_idx:i].min())) if in_period: dd_periods.append((start_idx, len(in_dd) - 1, dd.iloc[start_idx:].min())) lengths = [p[1] - p[0] + 1 for p in dd_periods] depths = [p[2] for p in dd_periods] return { "MaxDD_%": round(max_dd * 100, 2), "AvgDD_%": round(float(np.mean(depths)) * 100, 2), "AvgDD_len": round(float(np.mean(lengths)), 1), "MaxDD_len": int(np.max(lengths)), "RecoverDays_avg": round(float(np.mean(lengths)), 1), } def heal_index_metrics(returns: pd.Series) -> Dict[str, float]: """ Heal Index: quanto velocemente la equity si riprende dai drawdown. Definito come 1 - (Area Above Water / Area Total Path). Valori vicini a 1 = recupero rapido, vicini a 0 = drawdown prolungati. """ r = pd.to_numeric(returns, errors="coerce").fillna(0.0) if len(r) < 2: return {"HealIndex": np.nan, "AreaAboveWater": np.nan, "AreaUnderWater": np.nan} eq = (1.0 + r).cumprod() peak = eq.cummax() dd = eq / peak - 1.0 # <= 0 area_under = float(-dd.sum()) # >= 0 area_total = float(len(r)) area_above = area_total - area_under heal = area_above / area_total if area_total > 0 else np.nan return { "HealIndex": round(heal, 4) if np.isfinite(heal) else np.nan, "AreaAboveWater": round(area_above, 2), "AreaUnderWater": round(area_under, 2), } def h_min_100(returns: pd.Series) -> float: """ h_min_100: minimo dei rendimenti su finestre rolling di 100 barre. Indicatore di tail-risk locale. """ r = pd.to_numeric(returns, errors="coerce").fillna(0.0) if len(r) < 100: return np.nan eq = (1.0 + r).cumprod() roll_low = eq.rolling(100).min() / eq.rolling(100).max() - 1.0 return float(roll_low.min()) if roll_low.notna().any() else np.nan def drawdown_stats_simple(ret_series: pd.Series) -> Dict[str, float]: """Statistiche minime CAGR/Vol/Sharpe/MDD/Calmar (per stats per-ISIN).""" eq = (ret_series.fillna(0)).cumsum() rolling_max = eq.cummax() dd = eq - rolling_max maxdd = float(dd.min()) if len(dd) else 0.0 cagr = np.exp(ret_series.mean() * DAYS_PER_YEAR) - 1 annvol = ret_series.std() * np.sqrt(DAYS_PER_YEAR) sharpe = (ret_series.mean() / (ret_series.std() + 1e-12)) * np.sqrt(DAYS_PER_YEAR) calmar = (cagr / abs(maxdd)) if maxdd < 0 else np.nan return { "CAGR_%": round(cagr * 100, 2), "AnnVol_%": round(annvol * 100, 2), "Sharpe": round(float(sharpe), 2), "MaxDD_%eq": round(float(maxdd * 100), 2), "Calmar": round(float(calmar), 2) if np.isfinite(calmar) else np.nan, } def portfolio_metric_row(label: str, returns: pd.Series) -> Dict[str, Any]: """Riga di metriche aggregate per portfolio_metrics.xlsx.""" r = pd.to_numeric(returns, errors="coerce").fillna(0.0) if r.empty: return {"Portfolio": label} eq = (1.0 + r).cumprod() cagr = (eq.iloc[-1] / max(eq.iloc[0], 1e-12)) ** (DAYS_PER_YEAR / max(1, len(r))) - 1.0 vol = r.std() * np.sqrt(DAYS_PER_YEAR) sharpe = r.mean() / r.std() * np.sqrt(DAYS_PER_YEAR) if r.std() > 0 else np.nan downside = r[r < 0] sortino = r.mean() / downside.std() * np.sqrt(DAYS_PER_YEAR) if ( len(downside) > 1 and downside.std() > 0 ) else np.nan dd = eq / eq.cummax() - 1.0 mdd = float(dd.min()) calmar = cagr / abs(mdd) if mdd < 0 else np.nan heal = heal_index_metrics(r) return { "Portfolio": label, "CAGR_%": round(cagr * 100, 2), "AnnVol_%": round(vol * 100, 2), "Sharpe": round(float(sharpe), 2) if np.isfinite(sharpe) else np.nan, "Sortino": round(float(sortino), 2) if np.isfinite(sortino) else np.nan, "MaxDD_%": round(mdd * 100, 2), "Calmar": round(float(calmar), 2) if np.isfinite(calmar) else np.nan, "HealIndex": heal.get("HealIndex", np.nan), "R2_equity": round(r2_equity_line(r), 4) if np.isfinite(r2_equity_line(r)) else np.nan, } # ============================================================================= # PRICE FETCH (OPEN/CLOSE) - prezzi storici per esecuzione t+1 open # ============================================================================= def _build_symbol_euronext(row: pd.Series) -> Tuple[str, str]: """Costruisce (base_url, symbol) per la query API euronext.""" isin = str(row.get("ISIN", "")).strip() venue = str(row.get("Mercato", "")).strip() tok = str(row.get("TickerOpen", "") or "").strip() base = OPEN_PRICE_BASE_URL if tok and "-" in tok and tok.split("-")[0].upper() == isin.upper(): return base, tok if isin and venue: return base, f"{isin}-{venue}" return base, isin def fetch_price_history( isins: List[str], universe: pd.DataFrame, start_date: str, end_date: str, ) -> pd.DataFrame: """ Scarica la serie storica di prezzi open/close dall'API euronext per la lista di ISIN fornita. Restituisce un DataFrame long con colonne: Date | ISIN | Open | Close Implementa cache su disco in OPEN_CACHE_DIR (un file JSON per ISIN). """ OPEN_CACHE_DIR.mkdir(parents=True, exist_ok=True) frames: List[pd.DataFrame] = [] for isin in isins: try: row = universe.loc[universe["ISIN"] == str(isin)].iloc[0] except (KeyError, IndexError): print(f"[WARN] ISIN {isin} non trovato nell'universo") continue base, symbol = _build_symbol_euronext(row) cache_file = OPEN_CACHE_DIR / f"{symbol}.json" data = None # Cache check if cache_file.exists(): try: data = json.loads(cache_file.read_text()) except Exception: data = None if data is None: url = f"{base}/{symbol}?from={start_date}&to={end_date}" for attempt in range(1, OPEN_MAX_RETRY + 1): try: with urlopen(url, timeout=OPEN_TIMEOUT, context=ssl.create_default_context()) as resp: data = json.loads(resp.read().decode("utf-8")) try: cache_file.write_text(json.dumps(data)) except Exception: pass break except (HTTPError, URLError, ssl.SSLError) as exc: if attempt < OPEN_MAX_RETRY: time.sleep(OPEN_SLEEP_SEC) else: print(f"[ERROR] fetch {symbol}: {exc}") if not data or not isinstance(data, list): continue rows = [] for item in data: d = (item or {}).get("data") or {} dt_str = item.get("date") if item.get("date") else item.get("dt") if not dt_str: continue rows.append({ "Date": pd.to_datetime(dt_str, errors="coerce"), "ISIN": isin, "Open": _to_float_safe(d.get("open")), "Close": _to_float_safe(d.get("close")), }) if rows: frames.append(pd.DataFrame(rows)) if not frames: return pd.DataFrame(columns=["Date", "ISIN", "Open", "Close"]) out = pd.concat(frames, ignore_index=True).dropna(subset=["Date"]) return out.sort_values(["ISIN", "Date"]).reset_index(drop=True) def save_price_cache_summary(cache_dir: Path, out_file: Path) -> None: """Genera un Excel di riepilogo della cache prezzi (debug).""" if not cache_dir.exists(): return files = sorted(cache_dir.glob("*.json")) rows = [] for f in files: try: data = json.loads(f.read_text()) n = len(data) if isinstance(data, list) else 0 rows.append({"symbol": f.stem, "n_records": n, "size_kb": f.stat().st_size / 1024}) except Exception: rows.append({"symbol": f.stem, "n_records": 0, "size_kb": np.nan}) if rows: pd.DataFrame(rows).to_excel(out_file, index=False) # ============================================================================= # DATA LOADING - una sola lettura DB per ISIN, cache in memoria # ============================================================================= def detect_cols(df0: pd.DataFrame) -> Tuple[Optional[str], Optional[str], Optional[str]]: """Identifica le colonne Date/Ret/Price del DataFrame.""" col_date = detect_column(df0, ["Date", "Data", "Datetime", "Timestamp", "Time"]) col_ret = detect_column(df0, [ "Ret", "Return", "Rendimento", "Rend", "LogRet", "r_log", "r", "pct_chg" ]) col_px = detect_column(df0, [ "Close", "AdjClose", "Price", "Px", "Last", "Prezzo", "Chiusura" ]) return col_date, col_ret, col_px def load_all_assets( isins: List[str], engine: sa.Engine, sql_sp, n_bars: int, ptf_curr: str, ) -> Tuple[Dict[str, pd.DataFrame], List[Dict[str, str]]]: """ Carica le serie storiche di TUTTI gli ISIN una sola volta dal DB. Ritorna (assets_data, errors) dove assets_data e' un dict {isin: df_isin} con i DataFrame originali (Date, Ret, Px). Sostituisce i due loop separati dell'originale (linee 813 e 1177) che facevano DOPPIA lettura DB. Risparmio stimato: ~50% del tempo speso sul DB. """ assets_data: Dict[str, pd.DataFrame] = {} errors: List[Dict[str, str]] = [] for i, isin in enumerate(isins, 1): try: df_isin = pd.read_sql_query( sql_sp, engine, params={"isin": isin, "n": n_bars, "ptf": ptf_curr}, ) if df_isin.empty: errors.append({"ISIN": isin, "Errore": "SP vuota"}) continue col_date, col_ret, col_px = detect_cols(df_isin) if col_date: df_isin[col_date] = pd.to_datetime(df_isin[col_date], errors="coerce") df_isin = df_isin.sort_values(col_date) # Verifica copertura minima if col_ret and col_ret in df_isin.columns: n_valid = pd.to_numeric(df_isin[col_ret], errors="coerce").dropna().shape[0] elif col_px and col_px in df_isin.columns: n_valid = pd.to_numeric(df_isin[col_px], errors="coerce").dropna().shape[0] else: errors.append({"ISIN": isin, "Errore": "Ne rendimenti ne prezzi utilizzabili"}) continue if n_valid < max(200, WP + HA + 10): errors.append({ "ISIN": isin, "Errore": f"Serie troppo corta ({n_valid} punti)", }) continue # Salva le colonne canonicalizzate per ridurre rilevamenti successivi df_isin.attrs["col_date"] = col_date df_isin.attrs["col_ret"] = col_ret df_isin.attrs["col_px"] = col_px assets_data[isin] = df_isin if i % 10 == 0: print(f"[DATA] {i}/{len(isins)} ISIN caricati") except Exception as exc: errors.append({"ISIN": isin, "Errore": str(exc)}) print(f"[DATA] Caricati {len(assets_data)}/{len(isins)} ISIN dal DB") return assets_data, errors def compute_returns_decimal(df_isin: pd.DataFrame) -> pd.Series: """ Estrae la serie di rendimenti in DECIMALE dal df_isin. Auto-detecta percentuale vs decimale tramite mediana abs(). """ col_ret = df_isin.attrs.get("col_ret") col_px = df_isin.attrs.get("col_px") if col_ret and col_ret in df_isin.columns: r = pd.to_numeric(df_isin[col_ret], errors="coerce").astype(float).dropna() # Auto-scale: se median(|r|) > 1.5 e' in percentuale, da convertire med = r.abs().median() if pd.notnull(med) and med > 1.5: r = r / 100.0 return r if col_px and col_px in df_isin.columns: px = pd.to_numeric(df_isin[col_px], errors="coerce").astype(float).replace(0, np.nan) return np.log(px / px.shift(1)).dropna() return pd.Series(dtype=float) def compute_returns_percent(df_isin: pd.DataFrame) -> Tuple[pd.DataFrame, str]: """ Restituisce (df_isin con colonna ret_pct, nome_colonna_ret). Usato da knn_forward_backtest_one_asset che si aspetta % raw. """ df = df_isin.copy() col_ret = df.attrs.get("col_ret") col_px = df.attrs.get("col_px") if col_ret and col_ret in df.columns: df[col_ret] = pd.to_numeric(df[col_ret], errors="coerce").astype(float) return df, col_ret if col_px and col_px in df.columns: px = pd.to_numeric(df[col_px], errors="coerce").astype(float).replace(0, np.nan) df["_RetPct_"] = np.log(px / px.shift(1)) * 100.0 return df, "_RetPct_" return df, "" # ============================================================================= # kNN WALK-FORWARD BACKTEST - LOGICA MATEMATICA INVARIATA # ============================================================================= def knn_forward_backtest_one_asset( df_isin: pd.DataFrame, col_date: str, col_ret: str, Wp: int, Ha: int, k: int, theta_entry: float, exec_ret: Optional[pd.Series] = None, fee_bps: float = 10, sl_bps: Optional[float] = 300.0, tp_bps: Optional[float] = 800.0, trail_bps: Optional[float] = 300.0, time_stop_bars: Optional[int] = 20, theta_exit: Optional[float] = 0.0, weak_days_exit: Optional[int] = None, ) -> Tuple[pd.DataFrame, Dict]: """ Walk-forward SOLO LONG con regole di EXIT (SL/TP/TS/time/flip). Ritorna (signals_df, summary_metrics_dict). Nota: usa solo dati daily -> le soglie sono valutate a fine giornata, l'uscita avviene sulla barra successiva (modello prudente). LOGICA INVARIATA rispetto a v2.0 originale. """ r = pd.to_numeric(df_isin[col_ret], errors="coerce").astype(float) / 100.0 idx = df_isin[col_date] if col_date in df_isin.columns else pd.RangeIndex(len(r)) idx = pd.to_datetime(idx).dt.normalize() if exec_ret is not None: r_exec = pd.to_numeric(exec_ret, errors="coerce").astype(float) r_exec.index = pd.to_datetime(r_exec.index).normalize() r_exec = r_exec.reindex(idx) if len(r_exec) != len(r): r_exec = pd.Series(r_exec.values, index=idx).reindex(idx) else: r_exec = r fee = fee_bps / 10000.0 def _lib_predict(past_returns: pd.Series, win_last: np.ndarray): lib_wins, lib_out = build_pattern_library(past_returns, Wp, Ha) if lib_wins is None: return np.nan, np.nan curr_zn = z_norm(win_last) if curr_zn is None: return np.nan, np.nan est_out, avg_dist, _ = predict_from_library(curr_zn, lib_wins, lib_out, k=k) return float(est_out), float(avg_dist) in_pos = False entry_t = None trade_pnl = 0.0 trade_peak = 0.0 weak_streak = 0 rows = [] for t in range(Wp, len(r) - 1): past = r.iloc[:t] if past.dropna().shape[0] < (Wp + Ha): sig_out, est_out, avg_dist = 0, np.nan, np.nan rows.append(( idx.iloc[t], sig_out, est_out, avg_dist, r_exec.iloc[t + 1] if t + 1 < len(r_exec) else np.nan, )) continue win_last = r.iloc[t - Wp:t].values est_out, avg_dist = _lib_predict(past, win_last) sig_out = 1 if in_pos else 0 # === ENTRATA === if (not in_pos) and (est_out > theta_entry): sig_out = 1 in_pos = True entry_t = t trade_pnl = 0.0 trade_peak = 0.0 weak_streak = 0 # === USCITA === elif in_pos: next_ret = r_exec.iloc[t + 1] if t + 1 < len(r_exec) else np.nan pnl_if_stay = (1.0 + trade_pnl) * (1.0 + next_ret) - 1.0 peak_if_stay = max(trade_peak, pnl_if_stay) exit_reasons = [] if (sl_bps is not None) and (pnl_if_stay <= -sl_bps / 10000.0): exit_reasons.append("SL") if (tp_bps is not None) and (pnl_if_stay >= tp_bps / 10000.0): exit_reasons.append("TP") if (trail_bps is not None) and (peak_if_stay - pnl_if_stay >= trail_bps / 10000.0): exit_reasons.append("TRAIL") if (time_stop_bars is not None) and (t - entry_t + 1 >= time_stop_bars): exit_reasons.append("TIME") if theta_exit is not None: if est_out <= theta_exit: weak_streak = weak_streak + 1 if weak_days_exit else weak_streak if weak_days_exit is None: exit_reasons.append("FLIP") elif weak_streak >= weak_days_exit: exit_reasons.append("FLIP_STREAK") else: weak_streak = 0 if exit_reasons: sig_out = 0 in_pos = False entry_t = None trade_pnl = 0.0 trade_peak = 0.0 weak_streak = 0 else: trade_pnl = pnl_if_stay trade_peak = peak_if_stay rows.append(( idx.iloc[t], sig_out, est_out, avg_dist, r_exec.iloc[t + 1] if t + 1 < len(r_exec) else np.nan, )) sig_df = pd.DataFrame(rows, columns=["Date", "Signal", "EstOutcome", "AvgDist", "Ret+1"]) sig_df["Signal_prev"] = sig_df["Signal"].shift(1).fillna(0) trade_chg = (sig_df["Signal"] - sig_df["Signal_prev"]).abs() cost = trade_chg * fee sig_df["PnL"] = sig_df["Signal"] * sig_df["Ret+1"] - cost sig_df.drop(columns=["Signal_prev"], inplace=True) stats = drawdown_stats_simple(sig_df["PnL"]) stats.update({ "HitRate_%": round( 100 * ((sig_df["PnL"] > 0).sum() / max(1, sig_df["PnL"].notna().sum())), 2 ), "AvgTradeRet_bps": round(sig_df["PnL"].mean() * 10000, 2), "Turnover_%/step": round(100 * trade_chg.mean(), 2), "N_Steps": int(sig_df.shape[0]), "theta_entry": theta_entry, "theta_exit": (None if theta_exit is None else float(theta_exit)), "sl_bps": (None if sl_bps is None else float(sl_bps)), "tp_bps": (None if tp_bps is None else float(tp_bps)), "trail_bps": (None if trail_bps is None else float(trail_bps)), "time_stop_bars": (None if time_stop_bars is None else int(time_stop_bars)), "weak_days_exit": (None if weak_days_exit is None else int(weak_days_exit)), }) return sig_df, stats # ============================================================================= # QUALITY SCORING (per pattern_signals.xlsx) # ============================================================================= def _add_quality_scores(df: pd.DataFrame) -> pd.DataFrame: """Aggiunge OutcomeScore, SimilarityScore, QualityScore al DataFrame.""" out = df.copy() conf = pd.to_numeric(out.get("Confidence", np.nan), errors="coerce") est = pd.to_numeric(out.get("EstOutcome", np.nan), errors="coerce") dist = pd.to_numeric(out.get("AvgDist", np.nan), errors="coerce") max_abs_est = np.nanmax(np.abs(est)) if ( np.isfinite(np.nanmax(np.abs(est))) and (np.nanmax(np.abs(est)) > 0) ) else np.nan outcome_score = np.where( np.isnan(max_abs_est) | (max_abs_est == 0), np.nan, np.abs(est) / max_abs_est, ) similarity_score = 1.0 / (1.0 + dist.astype(float)) confidence_score = conf.astype(float) quality = confidence_score * similarity_score * outcome_score out["OutcomeScore"] = np.round(outcome_score, 4) out["SimilarityScore"] = np.round(similarity_score, 4) out["QualityScore"] = np.round(quality, 4) return out # ============================================================================= # SCORING / RANKING (utility per scelta ISIN) # ============================================================================= def _safe_rank(s: pd.Series) -> pd.Series: """Rank robusto su Series con possibili NaN.""" s = pd.to_numeric(s, errors="coerce") if s.notna().sum() == 0: return pd.Series(np.zeros(len(s)), index=s.index) s_filled = s.fillna(s.median()) return s_filled.rank(method="average") def _apply_score(df: pd.DataFrame, weights: Optional[Dict[str, float]] = None) -> pd.DataFrame: """ Calcola il punteggio aggregato per ranking degli ISIN. Combina Sharpe, CAGR, Calmar, HitRate, QualityScore con pesi configurabili. """ out = df.copy() if weights is None: weights = SCORE_WEIGHTS or { "Sharpe": 0.30, "CAGR_%": 0.20, "Calmar": 0.20, "HitRate_%": 0.15, "QualityScore": 0.15, } ranks = {} for col, w in weights.items(): if col not in out.columns or w == 0: continue s = _coerce_num(out[col]) if s.notna().sum() == 0: continue ranks[col] = _safe_rank(s) * float(w) if not ranks: out["Score"] = np.nan return out rank_df = pd.DataFrame(ranks) out["Score"] = rank_df.sum(axis=1) return out # ============================================================================= # PORTFOLIO BUILDING - EW, RP, EW_v2, RP_v2 (Config B) # ============================================================================= def equity_from_returns(r: pd.Series) -> pd.Series: """Equity curve a base 100 da serie di rendimenti.""" r = pd.to_numeric(r, errors="coerce").fillna(0.0) return (1 + r).cumprod() * 100 def monthly_returns(r: pd.Series) -> pd.Series: """Rendimenti mensili composti.""" r = pd.to_numeric(r, errors="coerce").fillna(0.0) if not isinstance(r.index, (pd.DatetimeIndex, pd.PeriodIndex, pd.TimedeltaIndex)): try: r.index = pd.to_datetime(r.index) except Exception: return pd.Series(dtype=float) return (1 + r).resample("M").prod() - 1 def plot_heatmap_monthly(r: pd.Series, title: str, save_path: Optional[str] = None) -> None: """Heatmap mensile dei rendimenti (anni x mesi).""" r = pd.to_numeric(r, errors="coerce").fillna(0.0) m = monthly_returns(r) if m.empty: return df = m.to_frame("ret") df["Year"], df["Month"] = df.index.year, df.index.month pv = df.pivot(index="Year", columns="Month", values="ret") fig, ax = plt.subplots(figsize=(10, 6)) im = ax.imshow(pv.fillna(0) * 100, cmap="RdYlGn", vmin=-3, vmax=5, aspect="auto") for i in range(pv.shape[0]): for j in range(pv.shape[1]): val = pv.iloc[i, j] if not np.isnan(val): ax.text(j, i, f"{val * 100:.1f}", ha="center", va="center", fontsize=8) ax.set_title(title) ax.set_xlabel("Mese") ax.set_ylabel("Anno") ax.set_xticks(range(12)) ax.set_xticklabels(range(1, 13)) fig.colorbar(im, ax=ax, label="%") plt.tight_layout() if save_path: savefig_safe(save_path, dpi=150) plt.close(fig) def inverse_vol_weights( df: pd.DataFrame, window: int = 60, max_weight: Optional[float] = None ) -> pd.DataFrame: """Pesi inverse-volatility con cap opzionale per singolo asset.""" vol = df.rolling(window).std() inv = 1 / vol.replace(0, np.nan) w = inv.div(inv.sum(axis=1), axis=0) w = w.ffill().fillna(1 / max(1, df.shape[1])) if max_weight is not None: w = w.clip(upper=max_weight) return w def make_active_weights( w_base: pd.DataFrame, sig: pd.DataFrame, renorm_to_1: bool = False, add_cash: bool = True, cash_label: str = "Cash", ) -> pd.DataFrame: """ Filtra i pesi sui soli ISIN attivi (signal=1). - renorm_to_1=False: lascia la quota inattiva come Cash - renorm_to_1=True: rialloca interamente sugli attivi """ if w_base is None or w_base.empty: return pd.DataFrame(index=sig.index, columns=[]) W = w_base.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0) S = sig.reindex_like(W).fillna(0).astype(int) W_active = W * (S > 0) row_sum = W_active.sum(axis=1) if renorm_to_1: W_active = W_active.div(row_sum.replace(0, np.nan), axis=0).fillna(0.0) if add_cash: W_active[cash_label] = 0.0 else: if add_cash: cash = (1.0 - row_sum).clip(lower=0.0, upper=1.0) W_active[cash_label] = cash keep = [c for c in W_active.columns if W_active[c].abs().sum() > 0] return W_active[keep] def _sanitize_weights(w: pd.DataFrame) -> pd.DataFrame: """Sanifica una matrice di pesi (rimuove NaN/Inf, normalizza per riga).""" if w is None or w.empty: return w out = w.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0) out = out.replace([np.inf, -np.inf], 0.0).clip(lower=0.0) row_sum = out.sum(axis=1) out = out.div(row_sum.replace(0, np.nan), axis=0).fillna(0.0) return out def _export_weights_daily( weights_dict: Dict[str, pd.DataFrame], out_xlsx: Path, ) -> None: """Esporta i pesi giornalieri in un Excel multi-foglio.""" if not weights_dict: return try: with pd.ExcelWriter(out_xlsx) as xw: for name, w in weights_dict.items(): if w is None or w.empty: continue sheet = str(name)[:31] w.to_excel(xw, sheet_name=sheet) print(f"[INFO] Salvato: {out_xlsx}") except Exception as exc: print(f"[WARN] Export weights_daily fallito: {exc}") # ============================================================================= # DYNAMIC PORTFOLIO BUILDER (ranking rolling) # ============================================================================= _dynamic_portfolio_cache: Dict[int, Dict] = {} def _build_dynamic_portfolio_returns( wide_pnl: pd.DataFrame, wide_sig: pd.DataFrame, wide_est: pd.DataFrame, top_n: int, window_bars: int = None, rp_lookback: int = None, ) -> Dict[str, Any]: """ Costruisce i portafogli dinamici EW e RP con ranking rolling. Per ogni data sceglie i top_n ISIN per EstOutcome medio nella window. REGOLA DI SIZING (corretta v2.1.1): - Equal Weight: ogni ISIN nel target riceve peso 1/top_n, INDIPENDENTEMENTE dal numero di ISIN effettivamente selezionati. Se ci sono meno di top_n candidati con segnale, la quota non investita resta in CASH. Questo evita la concentrazione 100% su un singolo ISIN nei periodi iniziali con pochi dati. - Risk Parity: pesi inverse-volatility con cap rp_max_weight, NORMALIZZATI a sum=1/top_n*N (non sum=1). Cosi' la somma dei pesi attivi non supera top_n*rp_max_weight e il deficit resta in cash. Evita la violazione del cap che avveniva con la rinormalizzazione a 1. """ window_bars = window_bars or RANKING_WINDOW_BARS rp_lookback = rp_lookback or RP_LOOKBACK if wide_pnl is None or wide_pnl.empty: idx = pd.Index([]) empty_w = pd.DataFrame(index=idx, columns=[]) return { "ret_eq": pd.Series(dtype=float), "ret_rp": pd.Series(dtype=float), "w_eq": empty_w, "w_rp": empty_w, "w_eq_act": empty_w, "w_rp_act": empty_w, "selection": {}, } dates = wide_pnl.index.sort_values() all_cols = wide_pnl.columns.tolist() w_eq = pd.DataFrame(0.0, index=dates, columns=all_cols) w_rp = pd.DataFrame(0.0, index=dates, columns=all_cols) selection = {} # Peso EW fisso per slot: 1/top_n. Cosi' anche con 1 solo segnale, # quell'ISIN prende 1/top_n e il resto va in cash. ew_per_slot = 1.0 / max(1, top_n) for dt in dates: sig_row = wide_sig.loc[dt] if dt in wide_sig.index else pd.Series(dtype=float) on_cols = [c for c in all_cols if sig_row.get(c, 0) == 1] if not on_cols: selection[dt] = [] continue window_est = wide_est.loc[:dt].tail(window_bars) if not wide_est.empty else pd.DataFrame() scores = [] for c in on_cols: s = pd.to_numeric(window_est[c], errors="coerce") if c in window_est.columns else pd.Series(dtype=float) est_score = s.mean(skipna=True) if pd.isna(est_score): continue scores.append((c, est_score)) if not scores: selection[dt] = [] continue scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True) base_isins_dt = [c for c, _ in scores_sorted[:top_n]] selection[dt] = base_isins_dt if not base_isins_dt: continue # --- Equal Weight --- # FIX v2.1.1: peso = 1/top_n per ciascun candidato, NON 1/len(candidates). # Cosi' il peso massimo per asset e' sempre <= 1/top_n e non si genera # concentrazione 100% quando i candidati sono pochi. w_eq.loc[dt, base_isins_dt] = ew_per_slot # --- Risk Parity --- # Pesi inv-vol con cap RP_MAX_WEIGHT, somma target = N_candidati/top_n # (cioe' la stessa esposizione totale di EW: 100% con top_n candidati, # frazione minore con meno candidati). window_pnl = wide_pnl.loc[:dt].tail(window_bars) rp_hist = window_pnl[base_isins_dt] rp_w = inverse_vol_weights(rp_hist, window=rp_lookback, max_weight=RP_MAX_WEIGHT) if not rp_w.empty: last = rp_w.iloc[-1].fillna(0.0) last_sum = float(last.sum()) if last_sum > 0: # Target esposizione totale: N_candidati/top_n (= 1 se candidati=top_n) target_total = len(base_isins_dt) / max(1, top_n) # Normalizziamo a target_total, MA mantenendo il cap scaled = last * (target_total / last_sum) # Riapplica il cap dopo lo scaling scaled = scaled.clip(upper=RP_MAX_WEIGHT) w_rp.loc[dt, scaled.index] = scaled.values w_eq_act = make_active_weights(w_eq, wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash") w_rp_act = make_active_weights(w_rp, wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash") ret_eq = (wide_pnl * w_eq_act.drop(columns=["Cash"], errors="ignore")).sum(axis=1) ret_rp = (wide_pnl * w_rp_act.drop(columns=["Cash"], errors="ignore")).sum(axis=1) return { "ret_eq": ret_eq, "ret_rp": ret_rp, "w_eq": w_eq, "w_rp": w_rp, "w_eq_act": w_eq_act, "w_rp_act": w_rp_act, "selection": selection, } def _get_dynamic_portfolio( top_n: int, wide_pnl: pd.DataFrame, wide_sig: pd.DataFrame, wide_est: pd.DataFrame, ) -> Dict[str, Any]: """Cache-aware getter del portafoglio dinamico per un dato TopN.""" cache_key = top_n if cache_key not in _dynamic_portfolio_cache: _dynamic_portfolio_cache[cache_key] = _build_dynamic_portfolio_returns( wide_pnl=wide_pnl, wide_sig=wide_sig, wide_est=wide_est, top_n=top_n, window_bars=RANKING_WINDOW_BARS, rp_lookback=RP_LOOKBACK, ) return _dynamic_portfolio_cache[cache_key] # ============================================================================= # COMPOSITION PLOTS # ============================================================================= def plot_portfolio_composition( weights: pd.DataFrame, title: str, save_path: Optional[str] = None, max_legend: int = 12, ) -> None: """Stacked area dei pesi per ISIN nel tempo.""" import os if weights is None or weights.empty: print(f"[SKIP] Nessun peso disponibile per: {title}") return W = weights.copy() if W.index.has_duplicates: W = W[~W.index.duplicated(keep="last")] W = W.sort_index() W = W.apply(pd.to_numeric, errors="coerce").fillna(0.0) keep_cols = [c for c in W.columns if float(np.abs(W[c]).sum()) > 0.0] if not keep_cols: print(f"[SKIP] Tutte le colonne hanno peso zero per: {title}") return W = W[keep_cols] row_sum = W.sum(axis=1) with np.errstate(invalid="ignore", divide="ignore"): W = W.div(row_sum.replace(0.0, np.nan), axis=0).fillna(0.0).clip(lower=0.0) if len(W.index) < 2 or W.shape[1] == 0: print(f"[SKIP] Serie troppo corta per: {title}") return avg_w = W.mean(axis=0).sort_values(ascending=False) ordered = avg_w.index.tolist() if len(ordered) > max_legend: head, tail = ordered[:max_legend], ordered[max_legend:] W_show = W[head].copy() W_show["Altri"] = W[tail].sum(axis=1) ordered = head + ["Altri"] else: W_show = W[ordered].copy() cmap = plt.colormaps.get_cmap("tab20") palette = [cmap(i % cmap.N) for i in range(len(ordered))] fig, ax = plt.subplots(figsize=(11, 6)) ax.stackplot(W_show.index, [W_show[c].values for c in ordered], labels=ordered, colors=palette) ax.set_title(f"Composizione portafoglio nel tempo - {title}") ax.set_ylim(0, 1) ax.grid(True, alpha=0.3) ax.set_ylabel("Peso") yticks = ax.get_yticks() ax.set_yticklabels([f"{y * 100:.0f}%" for y in yticks]) ncol = 2 if len(ordered) > 10 else 1 ax.legend(loc="upper left", bbox_to_anchor=(1.01, 1), frameon=False, ncol=ncol, title="ISIN") fig.tight_layout() if save_path: savefig_safe(save_path, dpi=150) plt.close(fig) def plot_portfolio_composition_fixed( weights: pd.DataFrame, title: str, save_path: Optional[str] = None, ) -> None: """ Variante: stack ATTIVI vs CASH (somma % invested vs cash %). Utile per vedere quando il sistema e' "sotto-investito". """ if weights is None or weights.empty: return W = weights.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0) if "Cash" not in W.columns: W["Cash"] = (1.0 - W.sum(axis=1)).clip(lower=0.0, upper=1.0) invested = 1.0 - W["Cash"] cash = W["Cash"] fig, ax = plt.subplots(figsize=(11, 4)) ax.fill_between(W.index, 0, invested, color="#2A4D7A", alpha=0.7, label="Investito") ax.fill_between(W.index, invested, 1.0, color="#D3D1C7", alpha=0.7, label="Cash") ax.set_ylim(0, 1) ax.set_title(f"Composizione Attivi vs Cash - {title}") ax.set_ylabel("Quota portafoglio") ax.legend(loc="upper right") ax.grid(True, alpha=0.3) fig.tight_layout() if save_path: savefig_safe(save_path, dpi=150) plt.close(fig) # ============================================================================= # TRADES REPORT (round-trip OPEN/CLOSE) # ============================================================================= def make_trades_report( bt_signals_df: pd.DataFrame, meta_df: pd.DataFrame, weights_dict: Dict[str, pd.DataFrame], ) -> Dict[str, pd.DataFrame]: """ Genera il trade report per ogni strategia: ogni riga rappresenta un trade round-trip OPEN -> CLOSE con date, durata, PnL. LOGICA INVARIATA rispetto all'originale. """ out: Dict[str, pd.DataFrame] = {} if bt_signals_df is None or bt_signals_df.empty: return out bt = bt_signals_df.copy() bt["Date"] = pd.to_datetime(bt["Date"]) bt["ISIN"] = bt["ISIN"].astype(str).str.strip() bt["Signal"] = pd.to_numeric(bt["Signal"], errors="coerce").fillna(0).astype(int) bt = bt.sort_values(["ISIN", "Date"]) isin_names = meta_df.set_index("ISIN")[["Nome", "Asset Class", "Categoria"]].to_dict("index") for strat_name, w_df in weights_dict.items(): if w_df is None or w_df.empty: continue w_clean = w_df.drop(columns=["Cash"], errors="ignore") trades = [] for isin, g in bt.groupby("ISIN"): g = g.sort_values("Date") sig = g["Signal"].values dates = g["Date"].values ret_next = g["Ret+1"].fillna(0.0).values in_pos = False entry_d = None entry_idx = None for i in range(len(sig)): if sig[i] == 1 and not in_pos: in_pos = True entry_d = dates[i] entry_idx = i elif sig[i] == 0 and in_pos: in_pos = False exit_d = dates[i] # PnL del trade come prodotto dei (1+r) escluso il giorno di chiusura trade_rets = ret_next[entry_idx:i] pnl = float(np.prod(1 + trade_rets) - 1) if len(trade_rets) > 0 else 0.0 duration = (pd.Timestamp(exit_d) - pd.Timestamp(entry_d)).days # Peso medio durante il trade (nella strategia) avg_w = np.nan if isin in w_clean.columns: try: mask = (w_clean.index >= entry_d) & (w_clean.index <= exit_d) avg_w = float(w_clean.loc[mask, isin].mean()) except Exception: pass info = isin_names.get(isin, {}) trades.append({ "ISIN": isin, "Nome": info.get("Nome"), "Asset Class": info.get("Asset Class"), "Categoria": info.get("Categoria"), "EntryDate": pd.Timestamp(entry_d).date(), "ExitDate": pd.Timestamp(exit_d).date(), "Duration_days": int(duration), "PnL_%": round(pnl * 100, 4), "AvgWeight": round(avg_w, 4) if np.isfinite(avg_w) else np.nan, "Contrib_%": round(pnl * avg_w * 100, 4) if np.isfinite(avg_w) else np.nan, }) # Eventuale trade ancora aperto a fine periodo if in_pos and entry_idx is not None: trade_rets = ret_next[entry_idx:] pnl = float(np.prod(1 + trade_rets) - 1) if len(trade_rets) > 0 else 0.0 info = isin_names.get(isin, {}) trades.append({ "ISIN": isin, "Nome": info.get("Nome"), "Asset Class": info.get("Asset Class"), "Categoria": info.get("Categoria"), "EntryDate": pd.Timestamp(entry_d).date(), "ExitDate": None, "Duration_days": np.nan, "PnL_%": round(pnl * 100, 4), "AvgWeight": np.nan, "Contrib_%": np.nan, }) if trades: out[strat_name] = pd.DataFrame(trades) return out def _build_performance_attribution( trades_dict: Dict[str, pd.DataFrame], ) -> pd.DataFrame: """Tabella di attribution: contribuzione per asset class e per strategia.""" rows = [] for strat, df in trades_dict.items(): if df is None or df.empty: continue for ac, g in df.groupby("Asset Class"): rows.append({ "Strategy": strat, "Asset Class": ac, "N_Trades": len(g), "AvgPnL_%": round(g["PnL_%"].mean(), 3), "SumPnL_%": round(g["PnL_%"].sum(), 2), "Contrib_%": round(g["Contrib_%"].sum(), 3) if "Contrib_%" in g.columns else np.nan, }) return pd.DataFrame(rows) def rebuild_daily_from_trades_dict( trades_dict: Dict[str, pd.DataFrame], date_index: pd.DatetimeIndex, ) -> pd.DataFrame: """ Ricostruisce il PnL giornaliero per strategia partendo dai trade. PnL del trade attribuito proporzionalmente sui giorni di holding. """ if not trades_dict: return pd.DataFrame(index=date_index) daily = pd.DataFrame(0.0, index=date_index, columns=list(trades_dict.keys())) for strat, df in trades_dict.items(): if df is None or df.empty: continue for _, t in df.iterrows(): entry = pd.Timestamp(t["EntryDate"]) if pd.notnull(t.get("EntryDate")) else None exit_d = pd.Timestamp(t["ExitDate"]) if pd.notnull(t.get("ExitDate")) else date_index[-1] if entry is None: continue mask = (date_index > entry) & (date_index <= exit_d) n_days = int(mask.sum()) if n_days == 0: continue contrib = float(t.get("Contrib_%", t.get("PnL_%", 0.0)) or 0.0) / 100.0 daily.loc[mask, strat] += contrib / n_days return daily # ============================================================================= # METRICHE FINALI PER TOP-N SWEEP (sezione 6 originale) # ============================================================================= def _calc_all_metrics_from_returns(returns: pd.Series, label: str = "") -> Dict[str, Any]: """Bundle completo di metriche per una singola serie di rendimenti.""" r = pd.to_numeric(returns, errors="coerce").fillna(0.0) if r.empty: return {"Portfolio": label} base = portfolio_metric_row(label, r) dd_info = drawdown_metrics(r) heal = heal_index_metrics(r) h_min = h_min_100(r) base.update({ "AvgDD_%": dd_info.get("AvgDD_%"), "MaxDD_len": dd_info.get("MaxDD_len"), "RecoverDays_avg": dd_info.get("RecoverDays_avg"), "AreaAboveWater": heal.get("AreaAboveWater"), "AreaUnderWater": heal.get("AreaUnderWater"), "h_min_100": round(h_min * 100, 2) if np.isfinite(h_min) else np.nan, }) return base def _select_isins_for_topN(df_sum: pd.DataFrame, top_n: int) -> List[str]: """Selezione full-sample dei top_n ISIN per Score (legacy).""" if df_sum is None or df_sum.empty or "Score" not in df_sum.columns: return [] return ( df_sum.sort_values("Score", ascending=False) .head(top_n)["ISIN"].astype(str).str.strip().tolist() ) def _build_portfolio_returns_for_isins( isins: List[str], wide_pnl: pd.DataFrame, wide_sig: pd.DataFrame, sizing: str = "equal_weight", ) -> pd.Series: """Costruisce i rendimenti di portafoglio per una lista di ISIN.""" if not isins: return pd.Series(dtype=float, index=wide_pnl.index if wide_pnl is not None else None) cols = [c for c in isins if c in wide_pnl.columns] if not cols: return pd.Series(dtype=float, index=wide_pnl.index) sub_pnl = wide_pnl[cols] sub_sig = wide_sig[cols] if all(c in wide_sig.columns for c in cols) else pd.DataFrame( 1, index=sub_pnl.index, columns=cols, ) if sizing == "risk_parity": w = inverse_vol_weights(sub_pnl, window=RP_LOOKBACK, max_weight=RP_MAX_WEIGHT) else: w = pd.DataFrame(1.0 / len(cols), index=sub_pnl.index, columns=cols) w_act = make_active_weights(w, sub_sig, renorm_to_1=False, add_cash=True, cash_label="Cash") return (sub_pnl * w_act.drop(columns=["Cash"], errors="ignore")).sum(axis=1) # ============================================================================= # STRATEGY CONFIGURATIONS (4 strategie operative: EW, RP, EW_v2, RP_v2) # ============================================================================= STRATEGY_PARAMS: Dict[str, Dict[str, Any]] = { "Equal_Weight": { "sizing": "equal_weight", "sl_bps": SIGNALS_CONFIG.get("sl_bps", 300.0), "tp_bps": SIGNALS_CONFIG.get("tp_bps", 800.0), "trail_bps": SIGNALS_CONFIG.get("trail_bps", 300.0), "time_stop_bars": SIGNALS_CONFIG.get("time_stop_bars", 20), }, "Risk_Parity": { "sizing": "risk_parity", "sl_bps": SIGNALS_CONFIG.get("sl_bps", 300.0), "tp_bps": SIGNALS_CONFIG.get("tp_bps", 800.0), "trail_bps": SIGNALS_CONFIG.get("trail_bps", 300.0), "time_stop_bars": SIGNALS_CONFIG.get("time_stop_bars", 20), }, # Le strategie _v2 implementano la Config B (grid search out-of-sample). # Note: nel backtest, min_holding_bars NON e' implementato a livello di # knn_forward_backtest_one_asset (la logica e' compatibile solo con il # motore di produzione). Le _v2 nel backtest usano TP/TRAIL piu' larghi # ma stesso schema di entry/exit del legacy. "Equal_Weight_v2": { "sizing": "equal_weight", "sl_bps": 300.0, "tp_bps": 1200.0, "trail_bps": 200.0, "time_stop_bars": 20, }, "Risk_Parity_v2": { "sizing": "risk_parity", "sl_bps": 300.0, "tp_bps": 1200.0, "trail_bps": 200.0, "time_stop_bars": 20, }, } def _override_strategy_params_from_config() -> None: """Applica eventuali override dalla sezione 'strategies' del config.""" if not STRATEGIES_CONFIG: return for name, spec in STRATEGIES_CONFIG.items(): if name.startswith("_") or not isinstance(spec, dict): continue if name not in STRATEGY_PARAMS: # Strategia definita solo in config: aggiungi STRATEGY_PARAMS[name] = {"sizing": spec.get("sizing", "equal_weight")} params = spec.get("params", {}) or {} for k, v in params.items(): STRATEGY_PARAMS[name][k] = v if "sizing" in spec: STRATEGY_PARAMS[name]["sizing"] = spec["sizing"] _override_strategy_params_from_config() # ============================================================================= # MAIN PIPELINE # ============================================================================= def main() -> None: """ Esegue la pipeline end-to-end. Wrapped in main() per: - poter essere importato senza side effect - permettere try/finally puliti sull'engine DB - facilitare il testing """ start_all = time.perf_counter() # ---- 1) UNIVERSO ---- print(f"[INFO] Caricamento universo da {UNIVERSO_XLSX}") universe = pd.read_excel(UNIVERSO_XLSX) col_isin_uni = detect_column(universe, ["ISIN", "isin", "codice isin"]) if col_isin_uni is None: raise ValueError("Nel file universo non trovo una colonna ISIN.") col_name_uni = detect_column(universe, [ "Nome", "Name", "Descrizione", "Description", "Security Name", "Instrument Name", ]) col_cat_uni = detect_column(universe, ["Categoria", "Category", "Classe", "Linea", "Tipo"]) col_ac_uni = detect_column(universe, [ "Asset Class", "AssetClass", "Classe di Attivo", "Classe Attivo", "Class", ]) isins = ( universe[col_isin_uni].astype(str).str.strip() .replace("", pd.NA).dropna().drop_duplicates().tolist() ) print(f"[INFO] ISIN totali in universo: {len(isins)}") meta_df = pd.DataFrame({"ISIN": universe[col_isin_uni].astype(str).str.strip()}) meta_df["Nome"] = universe[col_name_uni] if col_name_uni else None meta_df["Categoria"] = universe[col_cat_uni] if col_cat_uni else None meta_df["Asset Class"] = universe[col_ac_uni] if col_ac_uni else None meta_df = meta_df.drop_duplicates(subset=["ISIN"]).reset_index(drop=True) # ---- 2) CONNESSIONE DB ---- conn_str = read_connection_txt("connection.txt") engine = sa.create_engine(conn_str, fast_executemany=True) print("[INFO] Connessione pronta (SQLAlchemy + pyodbc).") sql_sp = text(f"EXEC {STORED_PROC} @ISIN = :isin, @n = :n, @PtfCurr = :ptf") # ---- 3) CARICAMENTO DATI (una sola volta per ISIN) ---- print("[INFO] Caricamento dati per tutti gli ISIN (singola passata)...") t_data = time.perf_counter() assets_data, load_errors = load_all_assets(isins, engine, sql_sp, N_BARS, PTF_CURR) errors: List[Dict[str, str]] = list(load_errors) print(f"[TIMER] Caricamento DB: {format_eta(time.perf_counter() - t_data)}") # ---- 4) HURST + PATTERN ANALYSIS (informativo) ---- print("[INFO] Calcolo Hurst + Pattern recognition (informativi)...") t_hp = time.perf_counter() hurst_rows: List[Dict[str, Any]] = [] pattern_rows: List[Dict[str, Any]] = [] last_dates: List[pd.Timestamp] = [] for i, (isin, df_isin) in enumerate(assets_data.items(), 1): try: col_date = df_isin.attrs.get("col_date") r = compute_returns_decimal(df_isin) if r.empty or len(r) < max(200, WP + HA + 10): continue # Hurst (solo informativo - regime classification) h_rs = hurst_rs_returns(r) h_dfa = hurst_dfa_returns(r) H = np.nanmedian([h_rs, h_dfa]) H = clamp01(H) if np.isfinite(H) else np.nan if pd.isna(H): regime = None elif H < 0.45: regime = "mean_reversion" elif H > 0.55: regime = "breakout" else: regime = "neutral" # Pattern characterization (informativo) lib_wins, lib_out = build_pattern_library(r, WP, HA, embargo=EMBARGO) date_last = df_isin[col_date].iloc[-1] if col_date else None if date_last is not None: last_dates.append(pd.to_datetime(date_last)) if lib_wins is None or len(r) < WP + HA: ptype, pconf = characterize_window( r, WP, z_rev=Z_REV, z_vol=Z_VOL, std_comp_pct=STD_COMP_PCT ) signal, est_out, avg_dist = 0, np.nan, np.nan else: curr_zn = z_norm(r.values[-WP:]) if curr_zn is None: ptype, pconf = characterize_window( r, WP, z_rev=Z_REV, z_vol=Z_VOL, std_comp_pct=STD_COMP_PCT ) signal, est_out, avg_dist = 0, np.nan, np.nan else: est_out, avg_dist, _ = predict_from_library( curr_zn, lib_wins, lib_out, k=KNN_K ) # NOTA v2.1: usa THETA globale, NON H/100 signal = 1 if est_out > THETA else 0 ptype, pconf = characterize_window( r, WP, z_rev=Z_REV, z_vol=Z_VOL, std_comp_pct=STD_COMP_PCT ) hurst_rows.append({ "ISIN": isin, "Hurst": None if pd.isna(H) else round(float(H), 4), "Regime": regime, }) pattern_rows.append({ "ISIN": isin, "DateLast": date_last, "PatternType": ptype, "Signal": {1: "long", -1: "short", 0: "flat"}.get(int(signal), "flat"), "Confidence": None if pconf is None else round(float(min(1.0, max(0.0, pconf))), 3), "EstOutcome": None if pd.isna(est_out) else float(est_out), "AvgDist": None if pd.isna(avg_dist) else float(avg_dist), "Wp": WP, "Ha": HA, "k": KNN_K, }) if i % 10 == 0: print(f" [HP] {i}/{len(assets_data)} ISIN analizzati") except Exception as exc: errors.append({"ISIN": isin, "Errore": f"Hurst/Pattern: {exc}"}) print(f"[TIMER] Hurst+Pattern: {format_eta(time.perf_counter() - t_hp)}") # ---- 4A) EXPORT HURST + PATTERN ---- hurst_df = pd.DataFrame(hurst_rows) if hurst_rows else pd.DataFrame( {"ISIN": [], "Hurst": [], "Regime": []} ) hurst_df["ISIN"] = hurst_df["ISIN"].astype(str).str.strip() meta_df["ISIN"] = meta_df["ISIN"].astype(str).str.strip() summary_hurst = meta_df.merge(hurst_df, on="ISIN", how="left") cols_hurst = ["ISIN", "Nome", "Categoria", "Asset Class", "Hurst", "Regime"] summary_hurst = summary_hurst[[c for c in cols_hurst if c in summary_hurst.columns]] summary_hurst = summary_hurst.sort_values( ["Hurst", "ISIN"], na_position="last" ).reset_index(drop=True) summary_hurst.to_excel(OUTPUT_HURST_XLSX, index=False) pat_df = pd.DataFrame(pattern_rows) if pattern_rows else pd.DataFrame() if not pat_df.empty: pat_df["ISIN"] = pat_df["ISIN"].astype(str).str.strip() summary_pattern = ( meta_df.merge(hurst_df, on="ISIN", how="left").merge(pat_df, on="ISIN", how="left") ) wanted = ["ISIN", "Nome", "Categoria", "Asset Class", "Hurst", "Regime", "DateLast", "PatternType", "Signal", "Confidence", "EstOutcome", "AvgDist", "Wp", "Ha", "k"] summary_pattern = summary_pattern[[c for c in wanted if c in summary_pattern.columns]] summary_pattern = _add_quality_scores(summary_pattern) sort_cols = [c for c in ["QualityScore", "Confidence", "OutcomeScore"] if c in summary_pattern.columns] if sort_cols: summary_pattern = summary_pattern.sort_values( sort_cols, ascending=[False] * len(sort_cols), na_position="last", ).reset_index(drop=True) summary_pattern.to_excel(OUTPUT_PATTERN_XLSX, index=False) print(f"[INFO] Salvato: {OUTPUT_HURST_XLSX} ({len(summary_hurst)} righe)") print(f"[INFO] Salvato: {OUTPUT_PATTERN_XLSX} ({len(summary_pattern)} righe)") # ---- 4B) BULK FETCH PREZZI OPEN PER ESECUZIONE t+1 ---- print("[INFO] Bulk fetch prezzi open/close per esecuzione t+1...") t_px = time.perf_counter() exec_rets_map: Dict[str, pd.Series] = {} if assets_data: # Trova range di date globale all_dates = [] for df_isin in assets_data.values(): cd = df_isin.attrs.get("col_date") if cd and cd in df_isin.columns: d_min = pd.to_datetime(df_isin[cd]).min() d_max = pd.to_datetime(df_isin[cd]).max() if pd.notnull(d_min) and pd.notnull(d_max): all_dates.append((d_min, d_max)) if all_dates: date_min = min(d[0] for d in all_dates).date().isoformat() date_max = max(d[1] for d in all_dates).date().isoformat() print(f" range fetch: {date_min} -> {date_max}, {len(assets_data)} ISIN") try: px_hist_all = fetch_price_history( isins=list(assets_data.keys()), universe=meta_df, start_date=date_min, end_date=date_max, ) if not px_hist_all.empty: for isin, g in px_hist_all.groupby("ISIN"): g = g.sort_values("Date") open_series = g[["Date", "Open"]].dropna() open_series["Date"] = pd.to_datetime(open_series["Date"]).dt.normalize() open_series = ( open_series.drop_duplicates(subset=["Date"]) .set_index("Date")["Open"] ) exec_rets_map[isin] = open_series.pct_change() except Exception as exc: print(f"[WARN] Bulk fetch prezzi fallito: {exc}") print(f"[TIMER] Bulk fetch: {format_eta(time.perf_counter() - t_px)}") # ---- 4C) FORWARD-BACKTEST (riusa assets_data) ---- print("[INFO] Walk-forward kNN backtest per tutte le strategie...") t_bt = time.perf_counter() # Per ogni strategia, mantieni i risultati separati bt_signals_by_strat: Dict[str, List[pd.DataFrame]] = {s: [] for s in STRATEGY_PARAMS} bt_summary_by_strat: Dict[str, List[Dict]] = {s: [] for s in STRATEGY_PARAMS} total_t = 0.0 for i, (isin, df_isin) in enumerate(assets_data.items(), 1): t0 = time.perf_counter() try: df_pct, col_ret_pct = compute_returns_percent(df_isin) col_date = df_isin.attrs.get("col_date") if not col_ret_pct or df_pct[col_ret_pct].dropna().shape[0] < max(200, WP + HA + 10): errors.append({"ISIN": isin, "Errore": "Serie troppo corta (BT)"}) continue exec_ret = exec_rets_map.get(isin) if exec_ret is not None and col_date and col_date in df_pct.columns: idx_dates = pd.to_datetime(df_pct[col_date]).dt.normalize() exec_ret = exec_ret.reindex(idx_dates) exec_ret.index = idx_dates # theta_entry = THETA globale (NO HURST v2.1) theta_entry = THETA # Esegui backtest per ogni strategia for strat_name, strat_cfg in STRATEGY_PARAMS.items(): sig_df, stats = knn_forward_backtest_one_asset( df_isin=df_pct, col_date=col_date if col_date else "Date", col_ret=col_ret_pct, Wp=WP, Ha=HA, k=KNN_K, theta_entry=theta_entry, exec_ret=exec_ret, fee_bps=10, sl_bps=strat_cfg.get("sl_bps"), tp_bps=strat_cfg.get("tp_bps"), trail_bps=strat_cfg.get("trail_bps"), time_stop_bars=strat_cfg.get("time_stop_bars"), ) name = meta_df.loc[meta_df["ISIN"] == isin, "Nome"].iloc[0] if ( meta_df["ISIN"] == isin ).any() else None cat = meta_df.loc[meta_df["ISIN"] == isin, "Categoria"].iloc[0] if ( meta_df["ISIN"] == isin ).any() else None ac = meta_df.loc[meta_df["ISIN"] == isin, "Asset Class"].iloc[0] if ( meta_df["ISIN"] == isin ).any() else None tmp = sig_df.copy() tmp.insert(0, "ISIN", isin) tmp.insert(1, "Nome", name) tmp.insert(2, "Categoria", cat) tmp.insert(3, "Asset Class", ac) tmp["Wp"] = WP tmp["Ha"] = HA tmp["k"] = KNN_K tmp["Theta"] = theta_entry bt_signals_by_strat[strat_name].append(tmp) stats_row = {"ISIN": isin, "Nome": name, "Categoria": cat, "Asset Class": ac} stats_row.update(stats) bt_summary_by_strat[strat_name].append(stats_row) except Exception as exc: errors.append({"ISIN": isin, "Errore": f"Backtest: {exc}"}) dt = time.perf_counter() - t0 total_t += dt if i % 10 == 0 or i == len(assets_data): avg_t = total_t / i eta = avg_t * (len(assets_data) - i) print(f" [BT] {i}/{len(assets_data)} ISIN " f"(avg {avg_t:.2f}s, ETA {format_eta(eta)})") print(f"[TIMER] Backtest totale: {format_eta(time.perf_counter() - t_bt)}") # Aggrega risultati per strategia (usa la prima come "base" per coerenza) # con il flusso originale che lavorava su EW only) base_strat = "Equal_Weight" bt_signals_df = ( pd.concat(bt_signals_by_strat[base_strat], ignore_index=True) if bt_signals_by_strat[base_strat] else pd.DataFrame() ) bt_summary_df = ( pd.DataFrame(bt_summary_by_strat[base_strat]) if bt_summary_by_strat[base_strat] else pd.DataFrame() ) if not bt_signals_df.empty: bt_signals_df.to_excel(FORWARD_BT_SIGNALS_XLSX, index=False) bt_summary_df.to_excel(FORWARD_BT_SUMMARY_XLSX, index=False) print(f"[INFO] Salvato: {FORWARD_BT_SIGNALS_XLSX} ({len(bt_signals_df):,} righe)") print(f"[INFO] Salvato: {FORWARD_BT_SUMMARY_XLSX} ({len(bt_summary_df):,} righe)") if errors: pd.DataFrame(errors).to_csv(ERROR_LOG_CSV, index=False) print(f"[INFO] Log errori: {ERROR_LOG_CSV} (tot: {len(errors)})") try: save_price_cache_summary(OPEN_CACHE_DIR, OPEN_CACHE_DIR / "prezzi_summary.xlsx") except Exception as exc: print(f"[WARN] Riepilogo prezzi non creato: {exc}") # Timer per fasi post-backtest start_post_timer(total_steps=4) # ---- 5) STRATEGIE PORTAFOGLIO DINAMICHE ---- print("[INFO] Costruzione portafogli dinamici per le strategie attive...") all_portfolios: Dict[str, Dict[str, Any]] = {} all_returns: Dict[str, pd.Series] = {} all_weights: Dict[str, pd.DataFrame] = {} for strat_name, signals_list in bt_signals_by_strat.items(): if not signals_list: continue sigs = pd.concat(signals_list, ignore_index=True) if sigs.empty: continue sigs["Date"] = pd.to_datetime(sigs["Date"]) sigs["ISIN"] = sigs["ISIN"].astype(str).str.strip() sigs["Signal"] = pd.to_numeric(sigs["Signal"], errors="coerce").fillna(0).astype(int) sigs["PnL"] = pd.to_numeric(sigs["PnL"], errors="coerce").fillna(0.0) wide_pnl = sigs.pivot_table( index="Date", columns="ISIN", values="PnL", aggfunc="sum" ).fillna(0.0) wide_sig = sigs.pivot_table( index="Date", columns="ISIN", values="Signal", aggfunc="last" ).fillna(0).astype(int) wide_est = sigs.pivot_table( index="Date", columns="ISIN", values="EstOutcome", aggfunc="last" ).sort_index() port = _build_dynamic_portfolio_returns( wide_pnl=wide_pnl, wide_sig=wide_sig, wide_est=wide_est, top_n=TOP_N, window_bars=RANKING_WINDOW_BARS, rp_lookback=RP_LOOKBACK, ) sizing = STRATEGY_PARAMS[strat_name].get("sizing", "equal_weight") ret_key = "ret_rp" if sizing == "risk_parity" else "ret_eq" w_key = "w_rp_act" if sizing == "risk_parity" else "w_eq_act" all_portfolios[strat_name] = port all_returns[strat_name] = port[ret_key] all_weights[strat_name] = port[w_key] checkpoint_post_timer("Portafogli dinamici") # ---- 5.4 EQUITY + HEATMAP per ogni strategia ---- print("[INFO] Equity curves e heatmap mensili...") if all_returns: plt.figure(figsize=(11, 6)) for name, r in all_returns.items(): eq = equity_from_returns(r) plt.plot(eq.index, eq.values, label=name, linewidth=1.5) plt.title(f"Equity Curve - Selezione dinamica Top{TOP_N}") plt.legend() plt.grid(True, alpha=0.3) plt.tight_layout() savefig_safe(str(PLOT_DIR / "equity_line_portafogli.png"), dpi=150) plt.close() for name, r in all_returns.items(): plot_heatmap_monthly( r, f"Heatmap mensile - {name}", save_path=str(PLOT_DIR / f"heatmap_{name}.png"), ) # Salva metriche portafogli port_metrics_rows = [portfolio_metric_row(name, r) for name, r in all_returns.items()] if port_metrics_rows: pd.DataFrame(port_metrics_rows).to_excel( OUTPUT_DIR / "portfolio_metrics.xlsx", index=False ) print(f"[INFO] Salvato: {OUTPUT_DIR / 'portfolio_metrics.xlsx'}") checkpoint_post_timer("Equity + heatmap") # Composition plots for name, w in all_weights.items(): plot_portfolio_composition( w, f"{name} (Top{TOP_N})", save_path=str(PLOT_DIR / f"composition_{name}.png"), ) plot_portfolio_composition_fixed( w, f"{name} (Top{TOP_N})", save_path=str(PLOT_DIR / f"composition_cash_{name}.png"), ) # Export pesi _export_weights_daily(all_weights, WEIGHTS_DAILY_XLSX) checkpoint_post_timer("Composition + weights") # ---- 5.5 TRADE REPORT ---- print("[INFO] Trade report e performance attribution...") if bt_signals_by_strat[base_strat]: sigs_all = pd.concat(bt_signals_by_strat[base_strat], ignore_index=True) trades_dict = make_trades_report(sigs_all, meta_df, all_weights) if trades_dict: try: with pd.ExcelWriter(TRADES_REPORT_XLSX) as xw: for name, df in trades_dict.items(): df.to_excel(xw, sheet_name=name[:31], index=False) print(f"[INFO] Salvato: {TRADES_REPORT_XLSX}") except Exception as exc: print(f"[WARN] Export trades_report fallito: {exc}") attr_df = _build_performance_attribution(trades_dict) if not attr_df.empty: attr_df.to_excel(PERF_ATTRIB_XLSX, index=False) print(f"[INFO] Salvato: {PERF_ATTRIB_XLSX}") # Ricostruzione daily da trades if all_returns: date_idx = next(iter(all_returns.values())).index daily_from_trades = rebuild_daily_from_trades_dict(trades_dict, date_idx) if not daily_from_trades.empty: daily_from_trades.to_csv(DAILY_FROM_TRADES_CSV, index_label="Date") daily_from_trades.to_excel(DAILY_FROM_TRADES_XLSX, index_label="Date") print(f"[INFO] Salvato: {DAILY_FROM_TRADES_CSV}") checkpoint_post_timer("Trade report") # ---- 6) LOOP TOP-N SWEEP (metriche per N=6..20) ---- print("[INFO] Calcolo metriche finali per N=6..20...") final_rows = [] if bt_signals_by_strat[base_strat]: sigs_base = pd.concat(bt_signals_by_strat[base_strat], ignore_index=True) sigs_base["Date"] = pd.to_datetime(sigs_base["Date"]) sigs_base["ISIN"] = sigs_base["ISIN"].astype(str).str.strip() sigs_base["Signal"] = pd.to_numeric(sigs_base["Signal"], errors="coerce").fillna(0).astype(int) sigs_base["PnL"] = pd.to_numeric(sigs_base["PnL"], errors="coerce").fillna(0.0) wide_pnl_b = sigs_base.pivot_table( index="Date", columns="ISIN", values="PnL", aggfunc="sum" ).fillna(0.0) wide_sig_b = sigs_base.pivot_table( index="Date", columns="ISIN", values="Signal", aggfunc="last" ).fillna(0).astype(int) wide_est_b = sigs_base.pivot_table( index="Date", columns="ISIN", values="EstOutcome", aggfunc="last" ).sort_index() for n in range(6, 21): port_n = _build_dynamic_portfolio_returns( wide_pnl=wide_pnl_b, wide_sig=wide_sig_b, wide_est=wide_est_b, top_n=n, ) r_eq = port_n["ret_eq"] r_rp = port_n["ret_rp"] row_eq = _calc_all_metrics_from_returns(r_eq, f"EW_Top{n}") row_rp = _calc_all_metrics_from_returns(r_rp, f"RP_Top{n}") row_eq["TopN"] = n row_rp["TopN"] = n final_rows.extend([row_eq, row_rp]) if final_rows: pd.DataFrame(final_rows).to_excel(FINAL_METRICS_XLSX, index=False) print(f"[INFO] Salvato: {FINAL_METRICS_XLSX}") # ---- WRAP UP ---- elapsed = time.perf_counter() - start_all print(f"\n[DONE] Pipeline completata in {format_eta(elapsed)}") print(f" Output in: {OUTPUT_DIR}") print(f" Plot in: {PLOT_DIR}") # ============================================================================= # ENTRY POINT # ============================================================================= if __name__ == "__main__": main()