fixato bug

2026-02-18 11:45:05 +01:00
parent e8f57afa99
commit ec2acc72a1
1 changed files with 136 additions and 5 deletions
--- a/equity_from_log.py
+++ b/equity_from_log.py
@@ -19,6 +19,7 @@ from pathlib import Path
 import pandas as pd
 import numpy as np
 import shutil
+import io

 from shared_utils import (
    detect_column,
@@ -94,6 +95,120 @@ if raw_whitelist:
    if whitelist:
        VALID_STRATEGIES = whitelist

+# =============================================================================
+# AUDIT LOG LOADER (FORMAT CHECKS)
+# =============================================================================
+REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
+NUMERIC_COLS = [
+    "EntryIndex",
+    "EntryAmount",
+    "SizeWeight",
+    "Price",
+    "PnL_%",
+    "Duration_bars",
+]
+
+
+def _clean_numeric_series(s: pd.Series) -> pd.Series:
+    if pd.api.types.is_numeric_dtype(s):
+        return s
+    txt = s.astype(str).str.strip()
+    txt = txt.str.replace("%", "", regex=False)
+    txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan})
+
+    def _fix_one(val: str) -> str:
+        if val is None or (isinstance(val, float) and np.isnan(val)):
+            return val
+        v = str(val).strip()
+        if not v:
+            return v
+        dot_n = v.count(".")
+        comma_n = v.count(",")
+
+        # Heuristic:
+        # - multiple dots with no commas => dots are thousands separators
+        if dot_n > 1 and comma_n == 0:
+            return v.replace(".", "")
+        # - both comma and dot present => decide decimal by last separator
+        if dot_n > 0 and comma_n > 0:
+            last_dot = v.rfind(".")
+            last_comma = v.rfind(",")
+            if last_comma > last_dot:
+                # comma as decimal, dots as thousands
+                return v.replace(".", "").replace(",", ".")
+            # dot as decimal, commas as thousands
+            return v.replace(",", "")
+        # - only comma present => comma as decimal
+        if comma_n > 0 and dot_n == 0:
+            return v.replace(",", ".")
+        return v
+
+    cleaned = txt.map(_fix_one)
+    return pd.to_numeric(cleaned, errors="coerce")
+
+
+def load_audit_log(path: Path) -> pd.DataFrame:
+    if not path.exists():
+        raise FileNotFoundError(f"Missing trades_audit_log.csv at {path}")
+
+    raw = path.read_text(encoding="utf-8", errors="ignore")
+    if not raw.strip():
+        raise SystemExit("Audit log vuoto.")
+
+    first_line = raw.splitlines()[0]
+    semi = first_line.count(";")
+    comma = first_line.count(",")
+
+    # Fix duplicated header (semicolon header + comma header in same line)
+    if semi > 0 and comma > 0 and ",Strategy," in first_line and "Strategy;" in first_line:
+        fixed = first_line.split(",", 1)[0]
+        print("[WARN] Header duplicato rilevato. Uso solo la parte prima della virgola.")
+        raw = "\n".join([fixed] + raw.splitlines()[1:])
+        sep = ";"  # force semicolon when duplicated header detected
+    else:
+        sep = ";" if semi >= comma else ","
+
+    df = pd.read_csv(io.StringIO(raw), sep=sep, dtype=str)
+
+    missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
+    if missing:
+        raise SystemExit(
+            f"Formato audit log non valido. Colonne mancanti: {missing}. "
+            f"Colonne trovate: {list(df.columns)}"
+        )
+
+    # Normalize key columns
+    df["Action"] = df["Action"].astype(str).str.upper().str.strip()
+    df["Strategy"] = df["Strategy"].astype(str).str.strip()
+    df["ISIN"] = df["ISIN"].astype(str).str.strip()
+
+    # Dates
+    df["TradeDate"] = pd.to_datetime(df["TradeDate"], errors="coerce", dayfirst=True)
+    if "LinkedOpenDate" in df.columns:
+        df["LinkedOpenDate"] = pd.to_datetime(df["LinkedOpenDate"], errors="coerce", dayfirst=True)
+
+    # Drop rows with invalid dates
+    before = len(df)
+    df = df.dropna(subset=["TradeDate"])
+    dropped = before - len(df)
+    if dropped > 0:
+        print(f"[WARN] Rimosse {dropped} righe con TradeDate non valido.")
+
+    # Keep only OPEN/CLOSE if present
+    if "Action" in df.columns:
+        before = len(df)
+        df = df[df["Action"].isin(["OPEN", "CLOSE"])]
+        dropped = before - len(df)
+        if dropped > 0:
+            print(f"[WARN] Rimosse {dropped} righe con Action non valida.")
+
+    # Numeric cleanup
+    for col in NUMERIC_COLS:
+        if col in df.columns:
+            df[col] = _clean_numeric_series(df[col])
+
+    return df
+
 # =============================================================================

 # FETCH RENDIMENTI DAL DB
@@ -198,6 +313,13 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
    else:
        close_map = pd.DataFrame().set_index(pd.Index([], name="_key"))

+    # debug counters
+    total_opens = 0
+    used_opens = 0
+    skipped_missing_isin = 0
+    skipped_bad_amount = 0
+    skipped_bad_window = 0
+
    for strat in strategies:
        aud_s = audit[audit["Strategy"] == strat]
        opens = aud_s[aud_s["Action"] == "OPEN"].copy()
@@ -211,13 +333,16 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
        )

        for _, op in opens.iterrows():
+            total_opens += 1
            isin = op["ISIN"]
            if isin not in returns_wide.columns:
+                skipped_missing_isin += 1
                continue

            ser = returns_wide[isin].astype(float)
            entry_amount = float(op.get("EntryAmount", 0.0) or 0.0)
            if entry_amount <= 0:
+                skipped_bad_amount += 1
                continue

            entry_idx = int(op.get("EntryIndex", 0) or 0)
@@ -236,6 +361,7 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
                exit_idx = len(ser)

            if exit_idx <= entry_idx:
+                skipped_bad_window += 1
                continue

            idx_seg  = ser.index[entry_idx:exit_idx]
@@ -243,6 +369,7 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p

            daily_num.loc[idx_seg, strat] += entry_amount * vals_seg
            daily_den.loc[idx_seg, strat] += entry_amount
+            used_opens += 1

    daily = pd.DataFrame(0.0, index=idx, columns=strategies)
    mask = daily_den > 0
@@ -256,6 +383,13 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
    )
    debug.to_csv(OUT_DEBUG_CSV, index_label="Date")

+    print(
+        f"[DEBUG] OPEN totali: {total_opens}, usati: {used_opens}, "
+        f"mancano ISIN: {skipped_missing_isin}, "
+        f"EntryAmount<=0: {skipped_bad_amount}, "
+        f"finestra non valida: {skipped_bad_window}"
+    )
+
    return daily

 # =============================================================================
@@ -267,11 +401,8 @@ def main():
    if not AUDIT_LOG_CSV.exists():
        raise FileNotFoundError("Missing trades_audit_log.csv")

-    # parsing robusto (LinkedOpenDate può mancare)
-    try:
-        audit = pd.read_csv(AUDIT_LOG_CSV, parse_dates=["TradeDate", "LinkedOpenDate"])
-    except ValueError:
-        audit = pd.read_csv(AUDIT_LOG_CSV, parse_dates=["TradeDate"])
+    # parsing robusto con controllo formato
+    audit = load_audit_log(AUDIT_LOG_CSV)

    if audit.empty:
        raise SystemExit("Audit log vuoto.")