Fixato bug su generazione plot
This commit is contained in:
@@ -19,7 +19,6 @@ from pathlib import Path
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import shutil
|
||||
import io
|
||||
|
||||
from shared_utils import (
|
||||
detect_column,
|
||||
@@ -99,6 +98,21 @@ if raw_whitelist:
|
||||
# AUDIT LOG LOADER (FORMAT CHECKS)
|
||||
# =============================================================================
|
||||
REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
|
||||
CANONICAL_AUDIT_COLS = [
|
||||
"Strategy",
|
||||
"ISIN",
|
||||
"Action",
|
||||
"TradeDate",
|
||||
"EntryIndex",
|
||||
"EntryAmount",
|
||||
"SizeWeight",
|
||||
"Price",
|
||||
"PnL_%",
|
||||
"ExitReason",
|
||||
"LinkedOpenDate",
|
||||
"Duration_bars",
|
||||
"Notes",
|
||||
]
|
||||
NUMERIC_COLS = [
|
||||
"EntryIndex",
|
||||
"EntryAmount",
|
||||
@@ -146,6 +160,15 @@ def _clean_numeric_series(s: pd.Series) -> pd.Series:
|
||||
cleaned = txt.map(_fix_one)
|
||||
return pd.to_numeric(cleaned, errors="coerce")
|
||||
|
||||
def _parse_mixed_dates(series: pd.Series) -> pd.Series:
|
||||
s = series.astype(str).str.strip()
|
||||
s = s.replace({"": np.nan, "nan": np.nan, "None": np.nan})
|
||||
dt_iso = pd.to_datetime(s, format="%Y-%m-%d", errors="coerce")
|
||||
dt_iso_ts = pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S", errors="coerce")
|
||||
dt_dmy = pd.to_datetime(s, format="%d/%m/%Y", errors="coerce")
|
||||
dt_dmy_ts = pd.to_datetime(s, format="%d/%m/%Y %H:%M:%S", errors="coerce")
|
||||
return dt_iso.fillna(dt_iso_ts).fillna(dt_dmy).fillna(dt_dmy_ts)
|
||||
|
||||
|
||||
def load_audit_log(path: Path) -> pd.DataFrame:
|
||||
if not path.exists():
|
||||
@@ -155,20 +178,39 @@ def load_audit_log(path: Path) -> pd.DataFrame:
|
||||
if not raw.strip():
|
||||
raise SystemExit("Audit log vuoto.")
|
||||
|
||||
first_line = raw.splitlines()[0]
|
||||
semi = first_line.count(";")
|
||||
comma = first_line.count(",")
|
||||
|
||||
# Fix duplicated header (semicolon header + comma header in same line)
|
||||
if semi > 0 and comma > 0 and ",Strategy," in first_line and "Strategy;" in first_line:
|
||||
fixed = first_line.split(",", 1)[0]
|
||||
print("[WARN] Header duplicato rilevato. Uso solo la parte prima della virgola.")
|
||||
raw = "\n".join([fixed] + raw.splitlines()[1:])
|
||||
sep = ";" # force semicolon when duplicated header detected
|
||||
lines = raw.splitlines()
|
||||
first_line = lines[0]
|
||||
if ";" in first_line:
|
||||
header = [c.strip() for c in first_line.split(",", 1)[0].split(";")]
|
||||
else:
|
||||
sep = ";" if semi >= comma else ","
|
||||
header = [c.strip() for c in first_line.split(",")]
|
||||
if header and header[0] == "":
|
||||
header = header[1:]
|
||||
if not header or "TradeDate" not in header:
|
||||
header = CANONICAL_AUDIT_COLS.copy()
|
||||
|
||||
df = pd.read_csv(io.StringIO(raw), sep=sep, dtype=str)
|
||||
rows = []
|
||||
mixed_rows = 0
|
||||
for line in lines[1:]:
|
||||
if not line or not line.strip():
|
||||
continue
|
||||
if ";" in line and line.count(";") >= 5:
|
||||
parts = line.split(";")
|
||||
else:
|
||||
parts = line.split(",")
|
||||
if parts and parts[0] == "":
|
||||
parts = parts[1:]
|
||||
mixed_rows += 1
|
||||
|
||||
if len(parts) > len(header):
|
||||
parts = parts[: len(header) - 1] + [",".join(parts[len(header) - 1 :])]
|
||||
elif len(parts) < len(header):
|
||||
parts = parts + [""] * (len(header) - len(parts))
|
||||
rows.append(parts)
|
||||
|
||||
df = pd.DataFrame(rows, columns=header)
|
||||
if mixed_rows > 0:
|
||||
print(f"[WARN] Audit log con {mixed_rows} righe in formato legacy/misto: normalizzate in lettura.")
|
||||
|
||||
missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
|
||||
if missing:
|
||||
@@ -183,9 +225,9 @@ def load_audit_log(path: Path) -> pd.DataFrame:
|
||||
df["ISIN"] = df["ISIN"].astype(str).str.strip()
|
||||
|
||||
# Dates
|
||||
df["TradeDate"] = pd.to_datetime(df["TradeDate"], errors="coerce", dayfirst=True)
|
||||
df["TradeDate"] = _parse_mixed_dates(df["TradeDate"])
|
||||
if "LinkedOpenDate" in df.columns:
|
||||
df["LinkedOpenDate"] = pd.to_datetime(df["LinkedOpenDate"], errors="coerce", dayfirst=True)
|
||||
df["LinkedOpenDate"] = _parse_mixed_dates(df["LinkedOpenDate"])
|
||||
|
||||
# Drop rows with invalid dates
|
||||
before = len(df)
|
||||
|
||||
Reference in New Issue
Block a user