Fixato bug su generazione plot

This commit is contained in:
fredmaloggia
2026-03-09 15:31:53 +01:00
parent ec2acc72a1
commit 919e375e80
2 changed files with 918 additions and 15 deletions

View File

@@ -19,7 +19,6 @@ from pathlib import Path
import pandas as pd
import numpy as np
import shutil
import io
from shared_utils import (
detect_column,
@@ -99,6 +98,21 @@ if raw_whitelist:
# AUDIT LOG LOADER (FORMAT CHECKS)
# =============================================================================
REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
CANONICAL_AUDIT_COLS = [
"Strategy",
"ISIN",
"Action",
"TradeDate",
"EntryIndex",
"EntryAmount",
"SizeWeight",
"Price",
"PnL_%",
"ExitReason",
"LinkedOpenDate",
"Duration_bars",
"Notes",
]
NUMERIC_COLS = [
"EntryIndex",
"EntryAmount",
@@ -146,6 +160,15 @@ def _clean_numeric_series(s: pd.Series) -> pd.Series:
cleaned = txt.map(_fix_one)
return pd.to_numeric(cleaned, errors="coerce")
def _parse_mixed_dates(series: pd.Series) -> pd.Series:
s = series.astype(str).str.strip()
s = s.replace({"": np.nan, "nan": np.nan, "None": np.nan})
dt_iso = pd.to_datetime(s, format="%Y-%m-%d", errors="coerce")
dt_iso_ts = pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S", errors="coerce")
dt_dmy = pd.to_datetime(s, format="%d/%m/%Y", errors="coerce")
dt_dmy_ts = pd.to_datetime(s, format="%d/%m/%Y %H:%M:%S", errors="coerce")
return dt_iso.fillna(dt_iso_ts).fillna(dt_dmy).fillna(dt_dmy_ts)
def load_audit_log(path: Path) -> pd.DataFrame:
if not path.exists():
@@ -155,20 +178,39 @@ def load_audit_log(path: Path) -> pd.DataFrame:
if not raw.strip():
raise SystemExit("Audit log vuoto.")
first_line = raw.splitlines()[0]
semi = first_line.count(";")
comma = first_line.count(",")
# Fix duplicated header (semicolon header + comma header in same line)
if semi > 0 and comma > 0 and ",Strategy," in first_line and "Strategy;" in first_line:
fixed = first_line.split(",", 1)[0]
print("[WARN] Header duplicato rilevato. Uso solo la parte prima della virgola.")
raw = "\n".join([fixed] + raw.splitlines()[1:])
sep = ";" # force semicolon when duplicated header detected
lines = raw.splitlines()
first_line = lines[0]
if ";" in first_line:
header = [c.strip() for c in first_line.split(",", 1)[0].split(";")]
else:
sep = ";" if semi >= comma else ","
header = [c.strip() for c in first_line.split(",")]
if header and header[0] == "":
header = header[1:]
if not header or "TradeDate" not in header:
header = CANONICAL_AUDIT_COLS.copy()
df = pd.read_csv(io.StringIO(raw), sep=sep, dtype=str)
rows = []
mixed_rows = 0
for line in lines[1:]:
if not line or not line.strip():
continue
if ";" in line and line.count(";") >= 5:
parts = line.split(";")
else:
parts = line.split(",")
if parts and parts[0] == "":
parts = parts[1:]
mixed_rows += 1
if len(parts) > len(header):
parts = parts[: len(header) - 1] + [",".join(parts[len(header) - 1 :])]
elif len(parts) < len(header):
parts = parts + [""] * (len(header) - len(parts))
rows.append(parts)
df = pd.DataFrame(rows, columns=header)
if mixed_rows > 0:
print(f"[WARN] Audit log con {mixed_rows} righe in formato legacy/misto: normalizzate in lettura.")
missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
if missing:
@@ -183,9 +225,9 @@ def load_audit_log(path: Path) -> pd.DataFrame:
df["ISIN"] = df["ISIN"].astype(str).str.strip()
# Dates
df["TradeDate"] = pd.to_datetime(df["TradeDate"], errors="coerce", dayfirst=True)
df["TradeDate"] = _parse_mixed_dates(df["TradeDate"])
if "LinkedOpenDate" in df.columns:
df["LinkedOpenDate"] = pd.to_datetime(df["LinkedOpenDate"], errors="coerce", dayfirst=True)
df["LinkedOpenDate"] = _parse_mixed_dates(df["LinkedOpenDate"])
# Drop rows with invalid dates
before = len(df)