Files
Trading/equity_from_log.py
fredmaloggia 3c3f2a7705 refactoring
2026-05-24 12:24:30 +02:00

520 lines
19 KiB
Python

# -*- coding: utf-8 -*-
"""
Equity / Reconciliation Builder from Audit Log
================================================
Legge il trades_audit_log.csv prodotto dalla pipeline di produzione e ricostruisce:
- daily_returns_by_strategy.csv: rendimenti giornalieri per strategia
(media ponderata sui trade attivi)
- equity_by_strategy.csv: curve di equity composte (base 100)
- debug_daily_by_strategy.csv: scomposizione num/den/ret per debug
- plot/equity_by_strategy.png
- plot/drawdown_by_strategy.png
Modifiche v2.1 (questa versione):
- Supporto MULTI-STRATEGIA: legge dinamicamente la whitelist da
pattern_knn_config.json -> equity_log.strategy_whitelist. Le strategie attese
sono ora 4 (Equal_Weight, Risk_Parity, Equal_Weight_v2, Risk_Parity_v2).
- Parser audit log tollerante a formato date misto (ISO + DD/MM/YYYY) per
retrocompatibilita' con il log esistente.
- Rimossa duplicazione codice / pulizia delle import ridondanti.
- Esecuzione idempotente: tutti i file output vengono rigenerati ad ogni run.
Esecuzione: python equity_from_log.py
"""
from __future__ import annotations
import shutil
from pathlib import Path
from typing import List, Optional
import numpy as np
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import text as sql_text
from shared_utils import (
detect_column,
load_config,
read_connection_txt,
require_section,
)
# =============================================================================
# CONFIG & PATHS
# =============================================================================
BASE_DIR = Path(__file__).resolve().parent
try:
CONFIG = load_config()
PATHS_CONFIG = require_section(CONFIG, "paths")
except Exception as exc: # pragma: no cover - best effort
print(f"[WARN] Config non disponibile ({exc}); uso i percorsi di default.")
CONFIG = None
PATHS_CONFIG = {}
OUTPUT_DIR = BASE_DIR / PATHS_CONFIG.get("output_dir", "output")
PLOT_DIR = BASE_DIR / PATHS_CONFIG.get("plot_dir", "plot")
AUDIT_LOG_CSV = BASE_DIR / PATHS_CONFIG.get(
"audit_log_csv", OUTPUT_DIR / "trades_audit_log.csv"
)
CONNECTION_TXT = BASE_DIR / PATHS_CONFIG.get("connection_txt", "connection.txt")
OUT_DAILY_CSV = OUTPUT_DIR / "daily_returns_by_strategy.csv"
OUT_EQUITY_CSV = OUTPUT_DIR / "equity_by_strategy.csv"
OUT_DEBUG_CSV = OUTPUT_DIR / "debug_daily_by_strategy.csv"
PLOT_EQUITY = PLOT_DIR / "equity_by_strategy.png"
PLOT_DD = PLOT_DIR / "drawdown_by_strategy.png"
DROPBOX_EXPORT_DIR = Path(
r"C:\Users\Admin\Dropbox\Condivisa Lavoro\Segnali di trading su ETF"
)
# Stored procedure defaults (sovrascritti se presenti in config)
SP_NAME_DEFAULT = "opt_RendimentoGiornaliero1_ALL"
SP_N_DEFAULT = 1260
PTF_CURR_DEFAULT = "EUR"
if CONFIG is not None:
try:
DB_CONFIG = require_section(CONFIG, "db")
SP_NAME_DEFAULT = str(DB_CONFIG.get("stored_proc", SP_NAME_DEFAULT))
SP_N_DEFAULT = int(DB_CONFIG.get("n_bars", SP_N_DEFAULT))
PTF_CURR_DEFAULT = str(DB_CONFIG.get("ptf_curr", PTF_CURR_DEFAULT))
except KeyError:
pass
# Whitelist strategie (con fallback alle 4 attese in v2.1)
DEFAULT_STRATEGIES = ["Equal_Weight", "Risk_Parity", "Equal_Weight_v2", "Risk_Parity_v2"]
EQUITY_CFG = CONFIG.get("equity_log", {}) if CONFIG else {}
raw_whitelist = EQUITY_CFG.get("strategy_whitelist") if isinstance(EQUITY_CFG, dict) else None
if raw_whitelist:
cleaned = [str(x).strip() for x in raw_whitelist if str(x).strip()]
VALID_STRATEGIES = cleaned if cleaned else DEFAULT_STRATEGIES
else:
VALID_STRATEGIES = DEFAULT_STRATEGIES
# =============================================================================
# DROPBOX EXPORT
# =============================================================================
def copy_to_dropbox(src: Path, dst_dir: Path = DROPBOX_EXPORT_DIR) -> bool:
if not src or not dst_dir or not src.exists():
if src and not src.exists():
print(f"[WARN] file non trovato per copia Dropbox: {src}")
return False
try:
dst_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst_dir / src.name)
print(f"[DROPBOX] Copiato {src.name} in {dst_dir}")
return True
except Exception as exc:
print(f"[WARN] impossibile copiare {src} su {dst_dir}: {exc}")
return False
# =============================================================================
# AUDIT LOG LOADER (PARSER ROBUSTO)
# =============================================================================
REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
CANONICAL_AUDIT_COLS = [
"Strategy", "ISIN", "Action", "TradeDate",
"EntryIndex", "EntryAmount", "SizeWeight", "Price",
"PnL_%", "ExitReason", "LinkedOpenDate", "Duration_bars", "Notes",
]
NUMERIC_COLS = ["EntryIndex", "EntryAmount", "SizeWeight", "Price", "PnL_%", "Duration_bars"]
def _clean_numeric_series(s: pd.Series) -> pd.Series:
"""Conversione robusta a numerico, tollera separatori italiani/europei."""
if pd.api.types.is_numeric_dtype(s):
return s
txt = s.astype(str).str.strip().str.replace("%", "", regex=False)
txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan})
def _fix_one(val):
if val is None or (isinstance(val, float) and np.isnan(val)):
return val
v = str(val).strip()
if not v:
return v
dot_n, comma_n = v.count("."), v.count(",")
if dot_n > 1 and comma_n == 0:
return v.replace(".", "") # dots are thousands sep
if dot_n > 0 and comma_n > 0:
return (v.replace(".", "").replace(",", ".")
if v.rfind(",") > v.rfind(".")
else v.replace(",", ""))
if comma_n > 0 and dot_n == 0:
return v.replace(",", ".") # comma is decimal
return v
return pd.to_numeric(txt.map(_fix_one), errors="coerce")
def _parse_mixed_dates(series: pd.Series) -> pd.Series:
"""Parser per date in formato misto ISO + europeo."""
s = series.astype(str).str.strip().replace({"": np.nan, "nan": np.nan, "None": np.nan})
return (pd.to_datetime(s, format="%Y-%m-%d", errors="coerce")
.fillna(pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S", errors="coerce"))
.fillna(pd.to_datetime(s, format="%d/%m/%Y", errors="coerce"))
.fillna(pd.to_datetime(s, format="%d/%m/%Y %H:%M:%S", errors="coerce")))
def load_audit_log(path: Path) -> pd.DataFrame:
if not path.exists():
raise FileNotFoundError(f"Missing trades_audit_log.csv at {path}")
raw = path.read_text(encoding="utf-8", errors="ignore")
if not raw.strip():
raise SystemExit("Audit log vuoto.")
lines = raw.splitlines()
first_line = lines[0]
if ";" in first_line:
header = [c.strip() for c in first_line.split(",", 1)[0].split(";")]
else:
header = [c.strip() for c in first_line.split(",")]
if header and header[0] == "":
header = header[1:]
if not header or "TradeDate" not in header:
header = CANONICAL_AUDIT_COLS.copy()
rows, mixed_rows = [], 0
for line in lines[1:]:
if not line or not line.strip():
continue
if ";" in line and line.count(";") >= 5:
parts = line.split(";")
else:
parts = line.split(",")
if parts and parts[0] == "":
parts = parts[1:]
mixed_rows += 1
if len(parts) > len(header):
parts = parts[: len(header) - 1] + [",".join(parts[len(header) - 1 :])]
elif len(parts) < len(header):
parts = parts + [""] * (len(header) - len(parts))
rows.append(parts)
df = pd.DataFrame(rows, columns=header)
if mixed_rows > 0:
print(f"[WARN] Audit log con {mixed_rows} righe in formato legacy: normalizzate in lettura.")
missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
if missing:
raise SystemExit(
f"Formato audit log non valido. Colonne mancanti: {missing}. "
f"Colonne trovate: {list(df.columns)}"
)
# Normalizzazione
df["Action"] = df["Action"].astype(str).str.upper().str.strip()
df["Strategy"] = df["Strategy"].astype(str).str.strip()
df["ISIN"] = df["ISIN"].astype(str).str.strip()
df["TradeDate"] = _parse_mixed_dates(df["TradeDate"])
if "LinkedOpenDate" in df.columns:
df["LinkedOpenDate"] = _parse_mixed_dates(df["LinkedOpenDate"])
# Pulizia righe
before = len(df)
df = df.dropna(subset=["TradeDate"])
if (dropped := before - len(df)) > 0:
print(f"[WARN] Rimosse {dropped} righe con TradeDate non valido.")
before = len(df)
df = df[df["Action"].isin(["OPEN", "CLOSE"])]
if (dropped := before - len(df)) > 0:
print(f"[WARN] Rimosse {dropped} righe con Action non valida.")
# Conversione numerici
for col in NUMERIC_COLS:
if col in df.columns:
df[col] = _clean_numeric_series(df[col])
return df
# =============================================================================
# DB FETCH RETURNS
# =============================================================================
def fetch_returns_from_db(
isins: List[str], start_date, end_date
) -> pd.DataFrame:
"""Scarica i rendimenti giornalieri per gli ISIN dell'audit log."""
conn_str = read_connection_txt(CONNECTION_TXT)
engine = sa.create_engine(conn_str, fast_executemany=True)
sql_sp = sql_text(
f"EXEC {SP_NAME_DEFAULT} @ISIN = :isin, @n = :n, @PtfCurr = :ptf"
)
frames = []
with engine.begin() as conn:
for isin in isins:
try:
df = pd.read_sql_query(
sql_sp, conn,
params={"isin": isin, "n": SP_N_DEFAULT, "ptf": PTF_CURR_DEFAULT},
)
except Exception as exc:
print(f"[ERROR] SP {SP_NAME_DEFAULT} fallita per {isin}: {exc}")
continue
if df.empty:
continue
col_date = detect_column(df, ["Date", "Data", "Datetime", "Timestamp", "Time"])
col_ret = detect_column(df, ["Ret", "Return", "Rendimento", "Rend", "Ret_%", "RET"])
col_px = detect_column(df, ["Close", "AdjClose", "Price", "Px", "Last", "Prezzo", "Chiusura"])
if not col_date:
continue
df[col_date] = pd.to_datetime(df[col_date], errors="coerce")
df = df.dropna(subset=[col_date]).sort_values(col_date)
if col_ret:
r = pd.to_numeric(df[col_ret], errors="coerce")
out = pd.DataFrame({"Date": df[col_date], "ISIN": isin, "Ret": r})
elif col_px:
px = pd.to_numeric(df[col_px], errors="coerce").astype(float).replace(0, np.nan)
log_r = np.log(px / px.shift(1))
out = pd.DataFrame({
"Date": df[col_date], "ISIN": isin,
"Ret": np.expm1(log_r),
})
else:
continue
frames.append(out)
if not frames:
return pd.DataFrame(index=pd.DatetimeIndex([], name="Date"))
long = pd.concat(frames, ignore_index=True).dropna(subset=["Date"])
mask = (long["Date"].dt.date >= start_date) & (long["Date"].dt.date <= end_date)
long = long.loc[mask]
wide = long.pivot(index="Date", columns="ISIN", values="Ret").sort_index()
# Auto-detect percent vs decimal
if not wide.empty:
max_abs = np.nanmax(np.abs(wide.values))
if np.isfinite(max_abs) and max_abs > 0.5:
wide = wide / 100.0
return wide
# =============================================================================
# RICOSTRUZIONE DAILY RETURNS PER STRATEGIA
# =============================================================================
def rebuild_daily_from_log(
audit: pd.DataFrame, returns_wide: pd.DataFrame
) -> pd.DataFrame:
"""
Per ogni strategia ricostruisce il rendimento giornaliero come media
ponderata sui trade attivi: r_strat(d) = sum(amount_i * ret_isin_i(d)) /
sum(amount_i), dove i somma sui trade aperti nel giorno d.
"""
strategies = sorted(audit["Strategy"].dropna().astype(str).unique())
if not strategies:
return pd.DataFrame(index=returns_wide.index, columns=[])
idx = returns_wide.index
daily_num = pd.DataFrame(0.0, index=idx, columns=strategies)
daily_den = pd.DataFrame(0.0, index=idx, columns=strategies)
# Mappa chiusure ISIN+OpenDate -> CloseDate
closes = audit[audit["Action"] == "CLOSE"].copy()
if not closes.empty:
if "LinkedOpenDate" in closes.columns:
closes["_key"] = (
closes["ISIN"].astype(str) + "|"
+ pd.to_datetime(closes["LinkedOpenDate"]).dt.strftime("%Y-%m-%d")
)
else:
closes["_key"] = (
closes["ISIN"].astype(str) + "|"
+ pd.to_datetime(closes["TradeDate"]).dt.strftime("%Y-%m-%d")
)
closes["TradeDate"] = pd.to_datetime(closes["TradeDate"])
closes_agg = (
closes.sort_values("TradeDate")
.groupby("_key", as_index=False)["TradeDate"]
.last()
)
close_map = closes_agg.set_index("_key")
else:
close_map = pd.DataFrame().set_index(pd.Index([], name="_key"))
# Counters per debug
total_opens = 0
used_opens = 0
skipped_missing_isin = 0
skipped_bad_amount = 0
skipped_bad_window = 0
for strat in strategies:
opens = audit[(audit["Strategy"] == strat) & (audit["Action"] == "OPEN")].copy()
if opens.empty:
continue
opens["_key"] = (
opens["ISIN"].astype(str) + "|"
+ pd.to_datetime(opens["TradeDate"]).dt.strftime("%Y-%m-%d")
)
for _, op in opens.iterrows():
total_opens += 1
isin = op["ISIN"]
if isin not in returns_wide.columns:
skipped_missing_isin += 1
continue
ser = returns_wide[isin].astype(float)
entry_amount = float(op.get("EntryAmount", 0.0) or 0.0)
if entry_amount <= 0:
skipped_bad_amount += 1
continue
entry_idx = int(op.get("EntryIndex", 0) or 0)
if entry_idx < 0 or entry_idx >= len(ser):
d_open = pd.Timestamp(op["TradeDate"]).normalize()
entry_idx = int(ser.index.searchsorted(d_open, side="left"))
key = op["_key"]
if key in close_map.index:
close_val = close_map.loc[key, "TradeDate"]
if isinstance(close_val, pd.Series):
close_val = close_val.iloc[-1]
exit_idx = int(ser.index.searchsorted(
pd.Timestamp(close_val).normalize(), side="left"
))
else:
exit_idx = len(ser)
if exit_idx <= entry_idx:
skipped_bad_window += 1
continue
idx_seg = ser.index[entry_idx:exit_idx]
vals_seg = ser.values[entry_idx:exit_idx]
daily_num.loc[idx_seg, strat] += entry_amount * vals_seg
daily_den.loc[idx_seg, strat] += entry_amount
used_opens += 1
daily = pd.DataFrame(0.0, index=idx, columns=strategies)
mask = daily_den > 0
daily[mask] = daily_num[mask] / daily_den[mask]
# Salva debug
debug = pd.concat(
{f"num_{c}": daily_num[c] for c in strategies}
| {f"den_{c}": daily_den[c] for c in strategies}
| {f"ret_{c}": daily[c] for c in strategies},
axis=1,
)
debug.to_csv(OUT_DEBUG_CSV, index_label="Date")
print(
f"[DEBUG] OPEN totali: {total_opens}, usati: {used_opens}, "
f"mancano ISIN: {skipped_missing_isin}, "
f"EntryAmount<=0: {skipped_bad_amount}, "
f"finestra non valida: {skipped_bad_window}"
)
return daily
# =============================================================================
# PLOT
# =============================================================================
def _plot_equity(equity: pd.DataFrame, out_path: Path) -> None:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
for col in equity.columns:
plt.plot(equity.index, equity[col], label=col)
plt.title("Equity per Strategy")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(str(out_path), dpi=150)
plt.close()
def _plot_drawdown(equity: pd.DataFrame, out_path: Path) -> None:
import matplotlib.pyplot as plt
dd = equity / equity.cummax() - 1.0
plt.figure(figsize=(10, 5))
for col in dd.columns:
plt.plot(dd.index, dd[col], label=col)
plt.title("Drawdown per Strategy")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(str(out_path), dpi=150)
plt.close()
# =============================================================================
# MAIN
# =============================================================================
def main() -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
PLOT_DIR.mkdir(parents=True, exist_ok=True)
if not AUDIT_LOG_CSV.exists():
raise FileNotFoundError("Missing trades_audit_log.csv")
audit = load_audit_log(AUDIT_LOG_CSV)
if audit.empty:
raise SystemExit("Audit log vuoto.")
if "Strategy" not in audit.columns:
raise SystemExit("Colonna 'Strategy' mancante nell'audit log.")
# Filtro whitelist
audit["Strategy"] = audit["Strategy"].astype(str)
before = len(audit)
audit = audit[audit["Strategy"].isin(VALID_STRATEGIES)]
if (removed := before - len(audit)) > 0:
print(
f"[INFO] Filtrate {removed} righe con strategie non incluse "
f"in {VALID_STRATEGIES}."
)
if audit.empty:
raise SystemExit(
f"Nessuna riga con strategie in {VALID_STRATEGIES} nell'audit log."
)
start_date = (audit["TradeDate"].min() - pd.Timedelta(days=10)).date()
end_date = (audit["TradeDate"].max() + pd.Timedelta(days=10)).date()
isins = sorted(audit["ISIN"].dropna().astype(str).unique())
ret_wide = fetch_returns_from_db(isins, start_date, end_date)
if ret_wide.empty:
raise RuntimeError("Nessun rendimento recuperato dal DB.")
# Daily returns + equity
daily = rebuild_daily_from_log(audit, ret_wide).sort_index()
daily.to_csv(OUT_DAILY_CSV, index_label="Date")
equity = (1.0 + daily.fillna(0.0)).cumprod() * 100.0
equity.to_csv(OUT_EQUITY_CSV, index_label="Date")
# Plots
_plot_equity(equity, PLOT_EQUITY)
_plot_drawdown(equity, PLOT_DD)
# Dropbox export
copy_to_dropbox(PLOT_EQUITY)
copy_to_dropbox(PLOT_DD)
print("Salvati:")
for path in [OUT_DAILY_CSV, OUT_EQUITY_CSV, OUT_DEBUG_CSV, PLOT_EQUITY, PLOT_DD]:
print(" -", path)
print(" -", DROPBOX_EXPORT_DIR / PLOT_EQUITY.name)
print(" -", DROPBOX_EXPORT_DIR / PLOT_DD.name)
if __name__ == "__main__":
main()