diff --git a/equity_from_log.py b/equity_from_log.py index fdf3cfc..6cc8887 100644 --- a/equity_from_log.py +++ b/equity_from_log.py @@ -19,6 +19,7 @@ from pathlib import Path import pandas as pd import numpy as np import shutil +import io from shared_utils import ( detect_column, @@ -94,6 +95,120 @@ if raw_whitelist: if whitelist: VALID_STRATEGIES = whitelist +# ============================================================================= +# AUDIT LOG LOADER (FORMAT CHECKS) +# ============================================================================= +REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"] +NUMERIC_COLS = [ + "EntryIndex", + "EntryAmount", + "SizeWeight", + "Price", + "PnL_%", + "Duration_bars", +] + + +def _clean_numeric_series(s: pd.Series) -> pd.Series: + if pd.api.types.is_numeric_dtype(s): + return s + txt = s.astype(str).str.strip() + txt = txt.str.replace("%", "", regex=False) + txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan}) + + def _fix_one(val: str) -> str: + if val is None or (isinstance(val, float) and np.isnan(val)): + return val + v = str(val).strip() + if not v: + return v + dot_n = v.count(".") + comma_n = v.count(",") + + # Heuristic: + # - multiple dots with no commas => dots are thousands separators + if dot_n > 1 and comma_n == 0: + return v.replace(".", "") + # - both comma and dot present => decide decimal by last separator + if dot_n > 0 and comma_n > 0: + last_dot = v.rfind(".") + last_comma = v.rfind(",") + if last_comma > last_dot: + # comma as decimal, dots as thousands + return v.replace(".", "").replace(",", ".") + # dot as decimal, commas as thousands + return v.replace(",", "") + # - only comma present => comma as decimal + if comma_n > 0 and dot_n == 0: + return v.replace(",", ".") + return v + + cleaned = txt.map(_fix_one) + return pd.to_numeric(cleaned, errors="coerce") + + +def load_audit_log(path: Path) -> pd.DataFrame: + if not path.exists(): + raise FileNotFoundError(f"Missing trades_audit_log.csv at {path}") + + raw = path.read_text(encoding="utf-8", errors="ignore") + if not raw.strip(): + raise SystemExit("Audit log vuoto.") + + first_line = raw.splitlines()[0] + semi = first_line.count(";") + comma = first_line.count(",") + + # Fix duplicated header (semicolon header + comma header in same line) + if semi > 0 and comma > 0 and ",Strategy," in first_line and "Strategy;" in first_line: + fixed = first_line.split(",", 1)[0] + print("[WARN] Header duplicato rilevato. Uso solo la parte prima della virgola.") + raw = "\n".join([fixed] + raw.splitlines()[1:]) + sep = ";" # force semicolon when duplicated header detected + else: + sep = ";" if semi >= comma else "," + + df = pd.read_csv(io.StringIO(raw), sep=sep, dtype=str) + + missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns] + if missing: + raise SystemExit( + f"Formato audit log non valido. Colonne mancanti: {missing}. " + f"Colonne trovate: {list(df.columns)}" + ) + + # Normalize key columns + df["Action"] = df["Action"].astype(str).str.upper().str.strip() + df["Strategy"] = df["Strategy"].astype(str).str.strip() + df["ISIN"] = df["ISIN"].astype(str).str.strip() + + # Dates + df["TradeDate"] = pd.to_datetime(df["TradeDate"], errors="coerce", dayfirst=True) + if "LinkedOpenDate" in df.columns: + df["LinkedOpenDate"] = pd.to_datetime(df["LinkedOpenDate"], errors="coerce", dayfirst=True) + + # Drop rows with invalid dates + before = len(df) + df = df.dropna(subset=["TradeDate"]) + dropped = before - len(df) + if dropped > 0: + print(f"[WARN] Rimosse {dropped} righe con TradeDate non valido.") + + # Keep only OPEN/CLOSE if present + if "Action" in df.columns: + before = len(df) + df = df[df["Action"].isin(["OPEN", "CLOSE"])] + dropped = before - len(df) + if dropped > 0: + print(f"[WARN] Rimosse {dropped} righe con Action non valida.") + + # Numeric cleanup + for col in NUMERIC_COLS: + if col in df.columns: + df[col] = _clean_numeric_series(df[col]) + + return df + # ============================================================================= # FETCH RENDIMENTI DAL DB @@ -198,6 +313,13 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p else: close_map = pd.DataFrame().set_index(pd.Index([], name="_key")) + # debug counters + total_opens = 0 + used_opens = 0 + skipped_missing_isin = 0 + skipped_bad_amount = 0 + skipped_bad_window = 0 + for strat in strategies: aud_s = audit[audit["Strategy"] == strat] opens = aud_s[aud_s["Action"] == "OPEN"].copy() @@ -211,13 +333,16 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p ) for _, op in opens.iterrows(): + total_opens += 1 isin = op["ISIN"] if isin not in returns_wide.columns: + skipped_missing_isin += 1 continue ser = returns_wide[isin].astype(float) entry_amount = float(op.get("EntryAmount", 0.0) or 0.0) if entry_amount <= 0: + skipped_bad_amount += 1 continue entry_idx = int(op.get("EntryIndex", 0) or 0) @@ -236,6 +361,7 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p exit_idx = len(ser) if exit_idx <= entry_idx: + skipped_bad_window += 1 continue idx_seg = ser.index[entry_idx:exit_idx] @@ -243,6 +369,7 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p daily_num.loc[idx_seg, strat] += entry_amount * vals_seg daily_den.loc[idx_seg, strat] += entry_amount + used_opens += 1 daily = pd.DataFrame(0.0, index=idx, columns=strategies) mask = daily_den > 0 @@ -256,6 +383,13 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p ) debug.to_csv(OUT_DEBUG_CSV, index_label="Date") + print( + f"[DEBUG] OPEN totali: {total_opens}, usati: {used_opens}, " + f"mancano ISIN: {skipped_missing_isin}, " + f"EntryAmount<=0: {skipped_bad_amount}, " + f"finestra non valida: {skipped_bad_window}" + ) + return daily # ============================================================================= @@ -267,11 +401,8 @@ def main(): if not AUDIT_LOG_CSV.exists(): raise FileNotFoundError("Missing trades_audit_log.csv") - # parsing robusto (LinkedOpenDate può mancare) - try: - audit = pd.read_csv(AUDIT_LOG_CSV, parse_dates=["TradeDate", "LinkedOpenDate"]) - except ValueError: - audit = pd.read_csv(AUDIT_LOG_CSV, parse_dates=["TradeDate"]) + # parsing robusto con controllo formato + audit = load_audit_log(AUDIT_LOG_CSV) if audit.empty: raise SystemExit("Audit log vuoto.")