# -*- coding: utf-8 -*- """ Equity / Reconciliation Builder from Audit Log ================================================ Legge il trades_audit_log.csv prodotto dalla pipeline di produzione e ricostruisce: - daily_returns_by_strategy.csv: rendimenti giornalieri per strategia (media ponderata sui trade attivi) - equity_by_strategy.csv: curve di equity composte (base 100) - debug_daily_by_strategy.csv: scomposizione num/den/ret per debug - plot/equity_by_strategy.png - plot/drawdown_by_strategy.png Modifiche v2.1 (questa versione): - Supporto MULTI-STRATEGIA: legge dinamicamente la whitelist da pattern_knn_config.json -> equity_log.strategy_whitelist. Le strategie attese sono ora 4 (Equal_Weight, Risk_Parity, Equal_Weight_v2, Risk_Parity_v2). - Parser audit log tollerante a formato date misto (ISO + DD/MM/YYYY) per retrocompatibilita' con il log esistente. - Rimossa duplicazione codice / pulizia delle import ridondanti. - Esecuzione idempotente: tutti i file output vengono rigenerati ad ogni run. Esecuzione: python equity_from_log.py """ from __future__ import annotations import shutil from pathlib import Path from typing import List, Optional import numpy as np import pandas as pd import sqlalchemy as sa from sqlalchemy import text as sql_text from shared_utils import ( detect_column, load_config, read_connection_txt, require_section, ) # ============================================================================= # CONFIG & PATHS # ============================================================================= BASE_DIR = Path(__file__).resolve().parent try: CONFIG = load_config() PATHS_CONFIG = require_section(CONFIG, "paths") except Exception as exc: # pragma: no cover - best effort print(f"[WARN] Config non disponibile ({exc}); uso i percorsi di default.") CONFIG = None PATHS_CONFIG = {} OUTPUT_DIR = BASE_DIR / PATHS_CONFIG.get("output_dir", "output") PLOT_DIR = BASE_DIR / PATHS_CONFIG.get("plot_dir", "plot") AUDIT_LOG_CSV = BASE_DIR / PATHS_CONFIG.get( "audit_log_csv", OUTPUT_DIR / "trades_audit_log.csv" ) CONNECTION_TXT = BASE_DIR / PATHS_CONFIG.get("connection_txt", "connection.txt") OUT_DAILY_CSV = OUTPUT_DIR / "daily_returns_by_strategy.csv" OUT_EQUITY_CSV = OUTPUT_DIR / "equity_by_strategy.csv" OUT_DEBUG_CSV = OUTPUT_DIR / "debug_daily_by_strategy.csv" PLOT_EQUITY = PLOT_DIR / "equity_by_strategy.png" PLOT_DD = PLOT_DIR / "drawdown_by_strategy.png" DROPBOX_EXPORT_DIR = Path( r"C:\Users\Admin\Dropbox\Condivisa Lavoro\Segnali di trading su ETF" ) # Stored procedure defaults (sovrascritti se presenti in config) SP_NAME_DEFAULT = "opt_RendimentoGiornaliero1_ALL" SP_N_DEFAULT = 1260 PTF_CURR_DEFAULT = "EUR" if CONFIG is not None: try: DB_CONFIG = require_section(CONFIG, "db") SP_NAME_DEFAULT = str(DB_CONFIG.get("stored_proc", SP_NAME_DEFAULT)) SP_N_DEFAULT = int(DB_CONFIG.get("n_bars", SP_N_DEFAULT)) PTF_CURR_DEFAULT = str(DB_CONFIG.get("ptf_curr", PTF_CURR_DEFAULT)) except KeyError: pass # Whitelist strategie (con fallback alle 4 attese in v2.1) DEFAULT_STRATEGIES = ["Equal_Weight", "Risk_Parity", "Equal_Weight_v2", "Risk_Parity_v2"] EQUITY_CFG = CONFIG.get("equity_log", {}) if CONFIG else {} raw_whitelist = EQUITY_CFG.get("strategy_whitelist") if isinstance(EQUITY_CFG, dict) else None if raw_whitelist: cleaned = [str(x).strip() for x in raw_whitelist if str(x).strip()] VALID_STRATEGIES = cleaned if cleaned else DEFAULT_STRATEGIES else: VALID_STRATEGIES = DEFAULT_STRATEGIES # ============================================================================= # DROPBOX EXPORT # ============================================================================= def copy_to_dropbox(src: Path, dst_dir: Path = DROPBOX_EXPORT_DIR) -> bool: if not src or not dst_dir or not src.exists(): if src and not src.exists(): print(f"[WARN] file non trovato per copia Dropbox: {src}") return False try: dst_dir.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst_dir / src.name) print(f"[DROPBOX] Copiato {src.name} in {dst_dir}") return True except Exception as exc: print(f"[WARN] impossibile copiare {src} su {dst_dir}: {exc}") return False # ============================================================================= # AUDIT LOG LOADER (PARSER ROBUSTO) # ============================================================================= REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"] CANONICAL_AUDIT_COLS = [ "Strategy", "ISIN", "Action", "TradeDate", "EntryIndex", "EntryAmount", "SizeWeight", "Price", "PnL_%", "ExitReason", "LinkedOpenDate", "Duration_bars", "Notes", ] NUMERIC_COLS = ["EntryIndex", "EntryAmount", "SizeWeight", "Price", "PnL_%", "Duration_bars"] def _clean_numeric_series(s: pd.Series) -> pd.Series: """Conversione robusta a numerico, tollera separatori italiani/europei.""" if pd.api.types.is_numeric_dtype(s): return s txt = s.astype(str).str.strip().str.replace("%", "", regex=False) txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan}) def _fix_one(val): if val is None or (isinstance(val, float) and np.isnan(val)): return val v = str(val).strip() if not v: return v dot_n, comma_n = v.count("."), v.count(",") if dot_n > 1 and comma_n == 0: return v.replace(".", "") # dots are thousands sep if dot_n > 0 and comma_n > 0: return (v.replace(".", "").replace(",", ".") if v.rfind(",") > v.rfind(".") else v.replace(",", "")) if comma_n > 0 and dot_n == 0: return v.replace(",", ".") # comma is decimal return v return pd.to_numeric(txt.map(_fix_one), errors="coerce") def _parse_mixed_dates(series: pd.Series) -> pd.Series: """Parser per date in formato misto ISO + europeo.""" s = series.astype(str).str.strip().replace({"": np.nan, "nan": np.nan, "None": np.nan}) return (pd.to_datetime(s, format="%Y-%m-%d", errors="coerce") .fillna(pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S", errors="coerce")) .fillna(pd.to_datetime(s, format="%d/%m/%Y", errors="coerce")) .fillna(pd.to_datetime(s, format="%d/%m/%Y %H:%M:%S", errors="coerce"))) def load_audit_log(path: Path) -> pd.DataFrame: if not path.exists(): raise FileNotFoundError(f"Missing trades_audit_log.csv at {path}") raw = path.read_text(encoding="utf-8", errors="ignore") if not raw.strip(): raise SystemExit("Audit log vuoto.") lines = raw.splitlines() first_line = lines[0] if ";" in first_line: header = [c.strip() for c in first_line.split(",", 1)[0].split(";")] else: header = [c.strip() for c in first_line.split(",")] if header and header[0] == "": header = header[1:] if not header or "TradeDate" not in header: header = CANONICAL_AUDIT_COLS.copy() rows, mixed_rows = [], 0 for line in lines[1:]: if not line or not line.strip(): continue if ";" in line and line.count(";") >= 5: parts = line.split(";") else: parts = line.split(",") if parts and parts[0] == "": parts = parts[1:] mixed_rows += 1 if len(parts) > len(header): parts = parts[: len(header) - 1] + [",".join(parts[len(header) - 1 :])] elif len(parts) < len(header): parts = parts + [""] * (len(header) - len(parts)) rows.append(parts) df = pd.DataFrame(rows, columns=header) if mixed_rows > 0: print(f"[WARN] Audit log con {mixed_rows} righe in formato legacy: normalizzate in lettura.") missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns] if missing: raise SystemExit( f"Formato audit log non valido. Colonne mancanti: {missing}. " f"Colonne trovate: {list(df.columns)}" ) # Normalizzazione df["Action"] = df["Action"].astype(str).str.upper().str.strip() df["Strategy"] = df["Strategy"].astype(str).str.strip() df["ISIN"] = df["ISIN"].astype(str).str.strip() df["TradeDate"] = _parse_mixed_dates(df["TradeDate"]) if "LinkedOpenDate" in df.columns: df["LinkedOpenDate"] = _parse_mixed_dates(df["LinkedOpenDate"]) # Pulizia righe before = len(df) df = df.dropna(subset=["TradeDate"]) if (dropped := before - len(df)) > 0: print(f"[WARN] Rimosse {dropped} righe con TradeDate non valido.") before = len(df) df = df[df["Action"].isin(["OPEN", "CLOSE"])] if (dropped := before - len(df)) > 0: print(f"[WARN] Rimosse {dropped} righe con Action non valida.") # Conversione numerici for col in NUMERIC_COLS: if col in df.columns: df[col] = _clean_numeric_series(df[col]) return df # ============================================================================= # DB FETCH RETURNS # ============================================================================= def fetch_returns_from_db( isins: List[str], start_date, end_date ) -> pd.DataFrame: """Scarica i rendimenti giornalieri per gli ISIN dell'audit log.""" conn_str = read_connection_txt(CONNECTION_TXT) engine = sa.create_engine(conn_str, fast_executemany=True) sql_sp = sql_text( f"EXEC {SP_NAME_DEFAULT} @ISIN = :isin, @n = :n, @PtfCurr = :ptf" ) frames = [] with engine.begin() as conn: for isin in isins: try: df = pd.read_sql_query( sql_sp, conn, params={"isin": isin, "n": SP_N_DEFAULT, "ptf": PTF_CURR_DEFAULT}, ) except Exception as exc: print(f"[ERROR] SP {SP_NAME_DEFAULT} fallita per {isin}: {exc}") continue if df.empty: continue col_date = detect_column(df, ["Date", "Data", "Datetime", "Timestamp", "Time"]) col_ret = detect_column(df, ["Ret", "Return", "Rendimento", "Rend", "Ret_%", "RET"]) col_px = detect_column(df, ["Close", "AdjClose", "Price", "Px", "Last", "Prezzo", "Chiusura"]) if not col_date: continue df[col_date] = pd.to_datetime(df[col_date], errors="coerce") df = df.dropna(subset=[col_date]).sort_values(col_date) if col_ret: r = pd.to_numeric(df[col_ret], errors="coerce") out = pd.DataFrame({"Date": df[col_date], "ISIN": isin, "Ret": r}) elif col_px: px = pd.to_numeric(df[col_px], errors="coerce").astype(float).replace(0, np.nan) log_r = np.log(px / px.shift(1)) out = pd.DataFrame({ "Date": df[col_date], "ISIN": isin, "Ret": np.expm1(log_r), }) else: continue frames.append(out) if not frames: return pd.DataFrame(index=pd.DatetimeIndex([], name="Date")) long = pd.concat(frames, ignore_index=True).dropna(subset=["Date"]) mask = (long["Date"].dt.date >= start_date) & (long["Date"].dt.date <= end_date) long = long.loc[mask] wide = long.pivot(index="Date", columns="ISIN", values="Ret").sort_index() # Auto-detect percent vs decimal if not wide.empty: max_abs = np.nanmax(np.abs(wide.values)) if np.isfinite(max_abs) and max_abs > 0.5: wide = wide / 100.0 return wide # ============================================================================= # RICOSTRUZIONE DAILY RETURNS PER STRATEGIA # ============================================================================= def rebuild_daily_from_log( audit: pd.DataFrame, returns_wide: pd.DataFrame ) -> pd.DataFrame: """ Per ogni strategia ricostruisce il rendimento giornaliero come media ponderata sui trade attivi: r_strat(d) = sum(amount_i * ret_isin_i(d)) / sum(amount_i), dove i somma sui trade aperti nel giorno d. """ strategies = sorted(audit["Strategy"].dropna().astype(str).unique()) if not strategies: return pd.DataFrame(index=returns_wide.index, columns=[]) idx = returns_wide.index daily_num = pd.DataFrame(0.0, index=idx, columns=strategies) daily_den = pd.DataFrame(0.0, index=idx, columns=strategies) # Mappa chiusure ISIN+OpenDate -> CloseDate closes = audit[audit["Action"] == "CLOSE"].copy() if not closes.empty: if "LinkedOpenDate" in closes.columns: closes["_key"] = ( closes["ISIN"].astype(str) + "|" + pd.to_datetime(closes["LinkedOpenDate"]).dt.strftime("%Y-%m-%d") ) else: closes["_key"] = ( closes["ISIN"].astype(str) + "|" + pd.to_datetime(closes["TradeDate"]).dt.strftime("%Y-%m-%d") ) closes["TradeDate"] = pd.to_datetime(closes["TradeDate"]) closes_agg = ( closes.sort_values("TradeDate") .groupby("_key", as_index=False)["TradeDate"] .last() ) close_map = closes_agg.set_index("_key") else: close_map = pd.DataFrame().set_index(pd.Index([], name="_key")) # Counters per debug total_opens = 0 used_opens = 0 skipped_missing_isin = 0 skipped_bad_amount = 0 skipped_bad_window = 0 for strat in strategies: opens = audit[(audit["Strategy"] == strat) & (audit["Action"] == "OPEN")].copy() if opens.empty: continue opens["_key"] = ( opens["ISIN"].astype(str) + "|" + pd.to_datetime(opens["TradeDate"]).dt.strftime("%Y-%m-%d") ) for _, op in opens.iterrows(): total_opens += 1 isin = op["ISIN"] if isin not in returns_wide.columns: skipped_missing_isin += 1 continue ser = returns_wide[isin].astype(float) entry_amount = float(op.get("EntryAmount", 0.0) or 0.0) if entry_amount <= 0: skipped_bad_amount += 1 continue entry_idx = int(op.get("EntryIndex", 0) or 0) if entry_idx < 0 or entry_idx >= len(ser): d_open = pd.Timestamp(op["TradeDate"]).normalize() entry_idx = int(ser.index.searchsorted(d_open, side="left")) key = op["_key"] if key in close_map.index: close_val = close_map.loc[key, "TradeDate"] if isinstance(close_val, pd.Series): close_val = close_val.iloc[-1] exit_idx = int(ser.index.searchsorted( pd.Timestamp(close_val).normalize(), side="left" )) else: exit_idx = len(ser) if exit_idx <= entry_idx: skipped_bad_window += 1 continue idx_seg = ser.index[entry_idx:exit_idx] vals_seg = ser.values[entry_idx:exit_idx] daily_num.loc[idx_seg, strat] += entry_amount * vals_seg daily_den.loc[idx_seg, strat] += entry_amount used_opens += 1 daily = pd.DataFrame(0.0, index=idx, columns=strategies) mask = daily_den > 0 daily[mask] = daily_num[mask] / daily_den[mask] # Salva debug debug = pd.concat( {f"num_{c}": daily_num[c] for c in strategies} | {f"den_{c}": daily_den[c] for c in strategies} | {f"ret_{c}": daily[c] for c in strategies}, axis=1, ) debug.to_csv(OUT_DEBUG_CSV, index_label="Date") print( f"[DEBUG] OPEN totali: {total_opens}, usati: {used_opens}, " f"mancano ISIN: {skipped_missing_isin}, " f"EntryAmount<=0: {skipped_bad_amount}, " f"finestra non valida: {skipped_bad_window}" ) return daily # ============================================================================= # PLOT # ============================================================================= def _plot_equity(equity: pd.DataFrame, out_path: Path) -> None: import matplotlib.pyplot as plt plt.figure(figsize=(10, 6)) for col in equity.columns: plt.plot(equity.index, equity[col], label=col) plt.title("Equity per Strategy") plt.grid(True) plt.legend() plt.tight_layout() plt.savefig(str(out_path), dpi=150) plt.close() def _plot_drawdown(equity: pd.DataFrame, out_path: Path) -> None: import matplotlib.pyplot as plt dd = equity / equity.cummax() - 1.0 plt.figure(figsize=(10, 5)) for col in dd.columns: plt.plot(dd.index, dd[col], label=col) plt.title("Drawdown per Strategy") plt.grid(True) plt.legend() plt.tight_layout() plt.savefig(str(out_path), dpi=150) plt.close() # ============================================================================= # MAIN # ============================================================================= def main() -> None: OUTPUT_DIR.mkdir(parents=True, exist_ok=True) PLOT_DIR.mkdir(parents=True, exist_ok=True) if not AUDIT_LOG_CSV.exists(): raise FileNotFoundError("Missing trades_audit_log.csv") audit = load_audit_log(AUDIT_LOG_CSV) if audit.empty: raise SystemExit("Audit log vuoto.") if "Strategy" not in audit.columns: raise SystemExit("Colonna 'Strategy' mancante nell'audit log.") # Filtro whitelist audit["Strategy"] = audit["Strategy"].astype(str) before = len(audit) audit = audit[audit["Strategy"].isin(VALID_STRATEGIES)] if (removed := before - len(audit)) > 0: print( f"[INFO] Filtrate {removed} righe con strategie non incluse " f"in {VALID_STRATEGIES}." ) if audit.empty: raise SystemExit( f"Nessuna riga con strategie in {VALID_STRATEGIES} nell'audit log." ) start_date = (audit["TradeDate"].min() - pd.Timedelta(days=10)).date() end_date = (audit["TradeDate"].max() + pd.Timedelta(days=10)).date() isins = sorted(audit["ISIN"].dropna().astype(str).unique()) ret_wide = fetch_returns_from_db(isins, start_date, end_date) if ret_wide.empty: raise RuntimeError("Nessun rendimento recuperato dal DB.") # Daily returns + equity daily = rebuild_daily_from_log(audit, ret_wide).sort_index() daily.to_csv(OUT_DAILY_CSV, index_label="Date") equity = (1.0 + daily.fillna(0.0)).cumprod() * 100.0 equity.to_csv(OUT_EQUITY_CSV, index_label="Date") # Plots _plot_equity(equity, PLOT_EQUITY) _plot_drawdown(equity, PLOT_DD) # Dropbox export copy_to_dropbox(PLOT_EQUITY) copy_to_dropbox(PLOT_DD) print("Salvati:") for path in [OUT_DAILY_CSV, OUT_EQUITY_CSV, OUT_DEBUG_CSV, PLOT_EQUITY, PLOT_DD]: print(" -", path) print(" -", DROPBOX_EXPORT_DIR / PLOT_EQUITY.name) print(" -", DROPBOX_EXPORT_DIR / PLOT_DD.name) if __name__ == "__main__": main()