Fixato bug su generazione plot

2026-03-09 15:31:53 +01:00
parent ec2acc72a1
commit 919e375e80
2 changed files with 918 additions and 15 deletions
--- a/equity_from_log.py
+++ b/equity_from_log.py
@@ -19,7 +19,6 @@ from pathlib import Path
 import pandas as pd
 import numpy as np
 import shutil
-import io

 from shared_utils import (
    detect_column,
@@ -99,6 +98,21 @@ if raw_whitelist:
 # AUDIT LOG LOADER (FORMAT CHECKS)
 # =============================================================================
 REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
+CANONICAL_AUDIT_COLS = [
+    "Strategy",
+    "ISIN",
+    "Action",
+    "TradeDate",
+    "EntryIndex",
+    "EntryAmount",
+    "SizeWeight",
+    "Price",
+    "PnL_%",
+    "ExitReason",
+    "LinkedOpenDate",
+    "Duration_bars",
+    "Notes",
+]
 NUMERIC_COLS = [
    "EntryIndex",
    "EntryAmount",
@@ -146,6 +160,15 @@ def _clean_numeric_series(s: pd.Series) -> pd.Series:
    cleaned = txt.map(_fix_one)
    return pd.to_numeric(cleaned, errors="coerce")

+def _parse_mixed_dates(series: pd.Series) -> pd.Series:
+    s = series.astype(str).str.strip()
+    s = s.replace({"": np.nan, "nan": np.nan, "None": np.nan})
+    dt_iso = pd.to_datetime(s, format="%Y-%m-%d", errors="coerce")
+    dt_iso_ts = pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S", errors="coerce")
+    dt_dmy = pd.to_datetime(s, format="%d/%m/%Y", errors="coerce")
+    dt_dmy_ts = pd.to_datetime(s, format="%d/%m/%Y %H:%M:%S", errors="coerce")
+    return dt_iso.fillna(dt_iso_ts).fillna(dt_dmy).fillna(dt_dmy_ts)
+

 def load_audit_log(path: Path) -> pd.DataFrame:
    if not path.exists():
@@ -155,20 +178,39 @@ def load_audit_log(path: Path) -> pd.DataFrame:
    if not raw.strip():
        raise SystemExit("Audit log vuoto.")

-    first_line = raw.splitlines()[0]
-    semi = first_line.count(";")
-    comma = first_line.count(",")
-
-    # Fix duplicated header (semicolon header + comma header in same line)
-    if semi > 0 and comma > 0 and ",Strategy," in first_line and "Strategy;" in first_line:
-        fixed = first_line.split(",", 1)[0]
-        print("[WARN] Header duplicato rilevato. Uso solo la parte prima della virgola.")
-        raw = "\n".join([fixed] + raw.splitlines()[1:])
-        sep = ";"  # force semicolon when duplicated header detected
+    lines = raw.splitlines()
+    first_line = lines[0]
+    if ";" in first_line:
+        header = [c.strip() for c in first_line.split(",", 1)[0].split(";")]
    else:
-        sep = ";" if semi >= comma else ","
+        header = [c.strip() for c in first_line.split(",")]
+    if header and header[0] == "":
+        header = header[1:]
+    if not header or "TradeDate" not in header:
+        header = CANONICAL_AUDIT_COLS.copy()

-    df = pd.read_csv(io.StringIO(raw), sep=sep, dtype=str)
+    rows = []
+    mixed_rows = 0
+    for line in lines[1:]:
+        if not line or not line.strip():
+            continue
+        if ";" in line and line.count(";") >= 5:
+            parts = line.split(";")
+        else:
+            parts = line.split(",")
+            if parts and parts[0] == "":
+                parts = parts[1:]
+            mixed_rows += 1
+
+        if len(parts) > len(header):
+            parts = parts[: len(header) - 1] + [",".join(parts[len(header) - 1 :])]
+        elif len(parts) < len(header):
+            parts = parts + [""] * (len(header) - len(parts))
+        rows.append(parts)
+
+    df = pd.DataFrame(rows, columns=header)
+    if mixed_rows > 0:
+        print(f"[WARN] Audit log con {mixed_rows} righe in formato legacy/misto: normalizzate in lettura.")

    missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
    if missing:
@@ -183,9 +225,9 @@ def load_audit_log(path: Path) -> pd.DataFrame:
    df["ISIN"] = df["ISIN"].astype(str).str.strip()

    # Dates
-    df["TradeDate"] = pd.to_datetime(df["TradeDate"], errors="coerce", dayfirst=True)
+    df["TradeDate"] = _parse_mixed_dates(df["TradeDate"])
    if "LinkedOpenDate" in df.columns:
-        df["LinkedOpenDate"] = pd.to_datetime(df["LinkedOpenDate"], errors="coerce", dayfirst=True)
+        df["LinkedOpenDate"] = _parse_mixed_dates(df["LinkedOpenDate"])

    # Drop rows with invalid dates
    before = len(df)