fixato bug
This commit is contained in:
@@ -19,6 +19,7 @@ from pathlib import Path
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import shutil
|
import shutil
|
||||||
|
import io
|
||||||
|
|
||||||
from shared_utils import (
|
from shared_utils import (
|
||||||
detect_column,
|
detect_column,
|
||||||
@@ -94,6 +95,120 @@ if raw_whitelist:
|
|||||||
if whitelist:
|
if whitelist:
|
||||||
VALID_STRATEGIES = whitelist
|
VALID_STRATEGIES = whitelist
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# AUDIT LOG LOADER (FORMAT CHECKS)
|
||||||
|
# =============================================================================
|
||||||
|
REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
|
||||||
|
NUMERIC_COLS = [
|
||||||
|
"EntryIndex",
|
||||||
|
"EntryAmount",
|
||||||
|
"SizeWeight",
|
||||||
|
"Price",
|
||||||
|
"PnL_%",
|
||||||
|
"Duration_bars",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_numeric_series(s: pd.Series) -> pd.Series:
|
||||||
|
if pd.api.types.is_numeric_dtype(s):
|
||||||
|
return s
|
||||||
|
txt = s.astype(str).str.strip()
|
||||||
|
txt = txt.str.replace("%", "", regex=False)
|
||||||
|
txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan})
|
||||||
|
|
||||||
|
def _fix_one(val: str) -> str:
|
||||||
|
if val is None or (isinstance(val, float) and np.isnan(val)):
|
||||||
|
return val
|
||||||
|
v = str(val).strip()
|
||||||
|
if not v:
|
||||||
|
return v
|
||||||
|
dot_n = v.count(".")
|
||||||
|
comma_n = v.count(",")
|
||||||
|
|
||||||
|
# Heuristic:
|
||||||
|
# - multiple dots with no commas => dots are thousands separators
|
||||||
|
if dot_n > 1 and comma_n == 0:
|
||||||
|
return v.replace(".", "")
|
||||||
|
# - both comma and dot present => decide decimal by last separator
|
||||||
|
if dot_n > 0 and comma_n > 0:
|
||||||
|
last_dot = v.rfind(".")
|
||||||
|
last_comma = v.rfind(",")
|
||||||
|
if last_comma > last_dot:
|
||||||
|
# comma as decimal, dots as thousands
|
||||||
|
return v.replace(".", "").replace(",", ".")
|
||||||
|
# dot as decimal, commas as thousands
|
||||||
|
return v.replace(",", "")
|
||||||
|
# - only comma present => comma as decimal
|
||||||
|
if comma_n > 0 and dot_n == 0:
|
||||||
|
return v.replace(",", ".")
|
||||||
|
return v
|
||||||
|
|
||||||
|
cleaned = txt.map(_fix_one)
|
||||||
|
return pd.to_numeric(cleaned, errors="coerce")
|
||||||
|
|
||||||
|
|
||||||
|
def load_audit_log(path: Path) -> pd.DataFrame:
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Missing trades_audit_log.csv at {path}")
|
||||||
|
|
||||||
|
raw = path.read_text(encoding="utf-8", errors="ignore")
|
||||||
|
if not raw.strip():
|
||||||
|
raise SystemExit("Audit log vuoto.")
|
||||||
|
|
||||||
|
first_line = raw.splitlines()[0]
|
||||||
|
semi = first_line.count(";")
|
||||||
|
comma = first_line.count(",")
|
||||||
|
|
||||||
|
# Fix duplicated header (semicolon header + comma header in same line)
|
||||||
|
if semi > 0 and comma > 0 and ",Strategy," in first_line and "Strategy;" in first_line:
|
||||||
|
fixed = first_line.split(",", 1)[0]
|
||||||
|
print("[WARN] Header duplicato rilevato. Uso solo la parte prima della virgola.")
|
||||||
|
raw = "\n".join([fixed] + raw.splitlines()[1:])
|
||||||
|
sep = ";" # force semicolon when duplicated header detected
|
||||||
|
else:
|
||||||
|
sep = ";" if semi >= comma else ","
|
||||||
|
|
||||||
|
df = pd.read_csv(io.StringIO(raw), sep=sep, dtype=str)
|
||||||
|
|
||||||
|
missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
|
||||||
|
if missing:
|
||||||
|
raise SystemExit(
|
||||||
|
f"Formato audit log non valido. Colonne mancanti: {missing}. "
|
||||||
|
f"Colonne trovate: {list(df.columns)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Normalize key columns
|
||||||
|
df["Action"] = df["Action"].astype(str).str.upper().str.strip()
|
||||||
|
df["Strategy"] = df["Strategy"].astype(str).str.strip()
|
||||||
|
df["ISIN"] = df["ISIN"].astype(str).str.strip()
|
||||||
|
|
||||||
|
# Dates
|
||||||
|
df["TradeDate"] = pd.to_datetime(df["TradeDate"], errors="coerce", dayfirst=True)
|
||||||
|
if "LinkedOpenDate" in df.columns:
|
||||||
|
df["LinkedOpenDate"] = pd.to_datetime(df["LinkedOpenDate"], errors="coerce", dayfirst=True)
|
||||||
|
|
||||||
|
# Drop rows with invalid dates
|
||||||
|
before = len(df)
|
||||||
|
df = df.dropna(subset=["TradeDate"])
|
||||||
|
dropped = before - len(df)
|
||||||
|
if dropped > 0:
|
||||||
|
print(f"[WARN] Rimosse {dropped} righe con TradeDate non valido.")
|
||||||
|
|
||||||
|
# Keep only OPEN/CLOSE if present
|
||||||
|
if "Action" in df.columns:
|
||||||
|
before = len(df)
|
||||||
|
df = df[df["Action"].isin(["OPEN", "CLOSE"])]
|
||||||
|
dropped = before - len(df)
|
||||||
|
if dropped > 0:
|
||||||
|
print(f"[WARN] Rimosse {dropped} righe con Action non valida.")
|
||||||
|
|
||||||
|
# Numeric cleanup
|
||||||
|
for col in NUMERIC_COLS:
|
||||||
|
if col in df.columns:
|
||||||
|
df[col] = _clean_numeric_series(df[col])
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
# FETCH RENDIMENTI DAL DB
|
# FETCH RENDIMENTI DAL DB
|
||||||
@@ -198,6 +313,13 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
|
|||||||
else:
|
else:
|
||||||
close_map = pd.DataFrame().set_index(pd.Index([], name="_key"))
|
close_map = pd.DataFrame().set_index(pd.Index([], name="_key"))
|
||||||
|
|
||||||
|
# debug counters
|
||||||
|
total_opens = 0
|
||||||
|
used_opens = 0
|
||||||
|
skipped_missing_isin = 0
|
||||||
|
skipped_bad_amount = 0
|
||||||
|
skipped_bad_window = 0
|
||||||
|
|
||||||
for strat in strategies:
|
for strat in strategies:
|
||||||
aud_s = audit[audit["Strategy"] == strat]
|
aud_s = audit[audit["Strategy"] == strat]
|
||||||
opens = aud_s[aud_s["Action"] == "OPEN"].copy()
|
opens = aud_s[aud_s["Action"] == "OPEN"].copy()
|
||||||
@@ -211,13 +333,16 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
|
|||||||
)
|
)
|
||||||
|
|
||||||
for _, op in opens.iterrows():
|
for _, op in opens.iterrows():
|
||||||
|
total_opens += 1
|
||||||
isin = op["ISIN"]
|
isin = op["ISIN"]
|
||||||
if isin not in returns_wide.columns:
|
if isin not in returns_wide.columns:
|
||||||
|
skipped_missing_isin += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ser = returns_wide[isin].astype(float)
|
ser = returns_wide[isin].astype(float)
|
||||||
entry_amount = float(op.get("EntryAmount", 0.0) or 0.0)
|
entry_amount = float(op.get("EntryAmount", 0.0) or 0.0)
|
||||||
if entry_amount <= 0:
|
if entry_amount <= 0:
|
||||||
|
skipped_bad_amount += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
entry_idx = int(op.get("EntryIndex", 0) or 0)
|
entry_idx = int(op.get("EntryIndex", 0) or 0)
|
||||||
@@ -236,6 +361,7 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
|
|||||||
exit_idx = len(ser)
|
exit_idx = len(ser)
|
||||||
|
|
||||||
if exit_idx <= entry_idx:
|
if exit_idx <= entry_idx:
|
||||||
|
skipped_bad_window += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
idx_seg = ser.index[entry_idx:exit_idx]
|
idx_seg = ser.index[entry_idx:exit_idx]
|
||||||
@@ -243,6 +369,7 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
|
|||||||
|
|
||||||
daily_num.loc[idx_seg, strat] += entry_amount * vals_seg
|
daily_num.loc[idx_seg, strat] += entry_amount * vals_seg
|
||||||
daily_den.loc[idx_seg, strat] += entry_amount
|
daily_den.loc[idx_seg, strat] += entry_amount
|
||||||
|
used_opens += 1
|
||||||
|
|
||||||
daily = pd.DataFrame(0.0, index=idx, columns=strategies)
|
daily = pd.DataFrame(0.0, index=idx, columns=strategies)
|
||||||
mask = daily_den > 0
|
mask = daily_den > 0
|
||||||
@@ -256,6 +383,13 @@ def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> p
|
|||||||
)
|
)
|
||||||
debug.to_csv(OUT_DEBUG_CSV, index_label="Date")
|
debug.to_csv(OUT_DEBUG_CSV, index_label="Date")
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"[DEBUG] OPEN totali: {total_opens}, usati: {used_opens}, "
|
||||||
|
f"mancano ISIN: {skipped_missing_isin}, "
|
||||||
|
f"EntryAmount<=0: {skipped_bad_amount}, "
|
||||||
|
f"finestra non valida: {skipped_bad_window}"
|
||||||
|
)
|
||||||
|
|
||||||
return daily
|
return daily
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -267,11 +401,8 @@ def main():
|
|||||||
if not AUDIT_LOG_CSV.exists():
|
if not AUDIT_LOG_CSV.exists():
|
||||||
raise FileNotFoundError("Missing trades_audit_log.csv")
|
raise FileNotFoundError("Missing trades_audit_log.csv")
|
||||||
|
|
||||||
# parsing robusto (LinkedOpenDate può mancare)
|
# parsing robusto con controllo formato
|
||||||
try:
|
audit = load_audit_log(AUDIT_LOG_CSV)
|
||||||
audit = pd.read_csv(AUDIT_LOG_CSV, parse_dates=["TradeDate", "LinkedOpenDate"])
|
|
||||||
except ValueError:
|
|
||||||
audit = pd.read_csv(AUDIT_LOG_CSV, parse_dates=["TradeDate"])
|
|
||||||
|
|
||||||
if audit.empty:
|
if audit.empty:
|
||||||
raise SystemExit("Audit log vuoto.")
|
raise SystemExit("Audit log vuoto.")
|
||||||
|
|||||||
Reference in New Issue
Block a user