riorganizzazione cartella

This commit is contained in:
fredmaloggia
2026-05-24 13:16:05 +02:00
parent 3c3f2a7705
commit ebb8114879
5 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,997 @@
# -*- coding: utf-8 -*-
"""Trading Pattern Recon w Hurst - Forex
VERSIONE CORRETTA (EOD Close-to-Close) con SHORT, Wavelet denoise (DB) su EstOutcome,
fee 1bp e cap esposizione per singola valuta 35%.
REQUISITI IMPLEMENTATI (come da tue istruzioni):
1) Denoise su EstOutcome (rolling/online, NO look-ahead)
2) Wavelet Daubechies (DB) -> db4
3) Operatività EOD: decisione a close(t), esecuzione a close(t)
4) PnL sempre su close(t+1)/close(t) (cioè Ret_fwd = close[t+1]/close[t]-1)
5) Fee = 1 bp applicata sui cambi di posizione (enter/exit/flip) al close(t)
6) Cap esposizione per singola valuta = 35% (net exposure per currency)
7) Per il resto: mantiene lo schema originale (shared_utils, per-asset backtest, ranking, Top15, EW e Risk Parity)
NOTA:
- Lo script usa SOLO serie AdjClose dal tuo endpoint (stesso JSON di prima).
- Non usa Open/High/Low e non introduce logiche intraday.
Output principali (cartella OUT_DIR):
- forward_bt_signals.csv (per-ticker: Signal, EstOutcomeRaw, EstOutcomeDenoised, Ret_fwd, PnL)
- forward_bt_summary.csv (metriche per ticker)
- ranking_score.csv (ranking + score)
- topn_tickers.csv (TopN dal ranking)
- portfolio_daily.csv (ret/eq line EW e RP)
- weights_eq_active_capped.csv / weights_rp_active_capped.csv (pesi con cash + cap valuta)
- currency_exposure_eq.csv / currency_exposure_rp.csv (diagnostica esposizioni)
Dipendenze:
- numpy, pandas, matplotlib, requests
- PyWavelets (pip install PyWavelets)
"""
from __future__ import annotations
import sys
import types
from pathlib import Path
from urllib.parse import quote
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time
# -------------------------
# Wavelets
# -------------------------
try:
import pywt
except ImportError as e:
raise ImportError("Manca PyWavelets (pywt). Installa con: pip install PyWavelets") from e
# ------------------------------------------------------------
# shared_utils import (local file next to this script)
# ------------------------------------------------------------
# Alcune versioni di shared_utils importano moduli opzionali (es. pyodbc). Evitiamo crash.
sys.modules.setdefault("pyodbc", types.SimpleNamespace())
import importlib.util
SHARED_UTILS_PATH = Path(__file__).with_name("shared_utils.py")
if not SHARED_UTILS_PATH.exists():
raise FileNotFoundError(f"shared_utils.py non trovato accanto allo script: {SHARED_UTILS_PATH}")
spec = importlib.util.spec_from_file_location("shared_utils", str(SHARED_UTILS_PATH))
shared_utils = importlib.util.module_from_spec(spec)
sys.modules["shared_utils"] = shared_utils
assert spec.loader is not None
spec.loader.exec_module(shared_utils)
build_pattern_library = shared_utils.build_pattern_library
predict_from_library = shared_utils.predict_from_library
z_norm = shared_utils.z_norm
# ============================================================
# CONFIG
# ============================================================
TICKERS = [
"EURUSD Curncy",
"USDJPY Curncy",
"GBPUSD Curncy",
"USDCHF Curncy",
"USDCAD Curncy",
"AUDUSD Curncy",
"NZDUSD Curncy",
"EURJPY Curncy",
"EURGBP Curncy",
"EURCHF Curncy",
"EURCAD Curncy",
"EURAUD Curncy",
"EURNZD Curncy",
"GBPJPY Curncy",
"GBPCHF Curncy",
"GBPCAD Curncy",
"GBPAUD Curncy",
"GBPNZD Curncy",
"CHFJPY Curncy",
"CADJPY Curncy",
"AUDJPY Curncy",
"NZDJPY Curncy",
"AUDNZD Curncy",
"AUDCAD Curncy",
"AUDCHF Curncy",
"NZDCAD Curncy",
"NZDCHF Curncy",
"CADCHF Curncy",
]
TICKERS = [t.strip() for t in TICKERS if isinstance(t, str) and t.strip()]
BASE_URL = "https://fin.scorer.app/finance/v2/history"
FROM_DATE = "20201224"
# Pattern params (come nello schema originale)
WP = 60
HA = 10
KNN_K = 25
# Short attivo per FX
ALLOW_SHORT = True
# Fee (FIX: 1 bp)
FEE_BPS = 1
# Exit controls (EOD approximation su Ret_fwd)
SL_BPS = 300.0
TP_BPS = 800.0
TRAIL_BPS = 300.0
TIME_STOP_BARS = 20
THETA_EXIT = 0.0
# Theta entry fallback se Hurst non disponibile
THETA_FALLBACK = 0.005
# Portfolio construction
TOP_N = 15
RP_MAX_WEIGHT = 2.0 / TOP_N # 0.1333... (come in legenda che avevi visto)
RANKING_WINDOW_BARS = 252
RP_LOOKBACK = 60
# Currency cap (net exposure per currency)
CURRENCY_CAP = 0.35
# Wavelet denoise su EstOutcome
DENOISE_ENABLED = True
DENOISE_WAVELET = "db4" # DB family (Daubechies)
DENOISE_LEVEL = 3
DENOISE_MIN_LEN = 96
DENOISE_THRESHOLD_MODE = "soft"
DAYS_PER_YEAR = 252
OUT_DIR = Path("./out_forex")
PLOT_DIR = Path("./plot_forex")
OUT_DIR.mkdir(parents=True, exist_ok=True)
PLOT_DIR.mkdir(parents=True, exist_ok=True)
# ============================================================
# Helpers: JSON -> DataFrame (AdjClose)
# ============================================================
def _detect_col(cols, candidates):
cols_l = {c.lower(): c for c in cols}
for cand in candidates:
if cand.lower() in cols_l:
return cols_l[cand.lower()]
# fallback: contains
for cand in candidates:
for c in cols:
if cand.lower() in str(c).lower():
return c
return None
def fetch_price_series(ticker: str, from_date: str) -> pd.DataFrame:
"""Scarica il JSON e restituisce DataFrame indicizzato per Date con colonna AdjClose."""
url = f"{BASE_URL}/{quote(ticker)}?fromDate={from_date}"
r = requests.get(url, timeout=30)
r.raise_for_status()
obj = r.json()
# Gestione schemi JSON possibili
if isinstance(obj, list) and len(obj) == 1 and isinstance(obj[0], dict) and "data" in obj[0]:
obj = obj[0]["data"]
if not isinstance(obj, list):
raise ValueError(f"Schema JSON inatteso per {ticker}: {type(obj)}")
df = pd.DataFrame(obj)
if df.empty:
raise ValueError(f"Nessuna riga per {ticker}")
col_date = _detect_col(df.columns, ["date", "datetime", "timestamp", "time"])
if col_date is None:
raise ValueError(f"Colonna date non trovata per {ticker}. Colonne: {df.columns.tolist()}")
# Priorità: AdjClose, altrimenti Close
col_px = _detect_col(df.columns, ["adj_close", "adjclose", "adjusted_close", "Adj Close", "AdjClose"])
if col_px is None:
col_px = _detect_col(df.columns, ["close", "px_last", "last", "price"])
if col_px is None:
raise ValueError(f"Colonna prezzo non trovata per {ticker}. Colonne: {df.columns.tolist()}")
df[col_date] = pd.to_datetime(df[col_date], errors="coerce", utc=True).dt.tz_localize(None)
df[col_px] = pd.to_numeric(df[col_px], errors="coerce")
df = df.dropna(subset=[col_date, col_px]).sort_values(col_date)
df = df.drop_duplicates(subset=[col_date]).set_index(col_date)
# Normalizza indice a date
df.index = pd.to_datetime(df.index).normalize()
df = df[~df.index.duplicated(keep="last")]
out = pd.DataFrame(index=df.index)
out.index.name = "Date"
out["AdjClose"] = df[col_px].astype(float)
return out.sort_index()
# ============================================================
# Hurst (RS + DFA) su returns
# ============================================================
def hurst_rs_returns(r: np.ndarray, win_grid=None, min_seg=1) -> float:
r = pd.Series(r).dropna().astype("float64").values
n = len(r)
if n < 200:
return np.nan
if win_grid is None:
base = np.array([16, 24, 32, 48, 64, 96, 128, 192, 256, 384], dtype=int)
win_grid = [w for w in base if w <= n // 2]
RS_vals, sizes = [], []
for w in win_grid:
m = n // w
if w < 8 or m < min_seg:
continue
rs_list = []
for i in range(m):
seg = r[i * w : (i + 1) * w]
seg = seg - np.mean(seg)
sd = seg.std(ddof=1)
if sd == 0 or not np.isfinite(sd):
continue
y = np.cumsum(seg)
rs = (np.max(y) - np.min(y)) / sd
if np.isfinite(rs) and rs > 0:
rs_list.append(rs)
if rs_list:
RS_vals.append(np.mean(rs_list))
sizes.append(w)
if len(RS_vals) < 3:
return np.nan
sizes = np.array(sizes, float)
RS_vals = np.array(RS_vals, float)
mask = np.isfinite(RS_vals) & (RS_vals > 0)
sizes, RS_vals = sizes[mask], RS_vals[mask]
if sizes.size < 3:
return np.nan
slope, _ = np.polyfit(np.log(sizes), np.log(RS_vals), 1)
return float(np.clip(slope, 0.0, 1.0)) if np.isfinite(slope) else np.nan
def hurst_dfa_returns(r: np.ndarray, win_grid=None) -> float:
r = pd.Series(r).dropna().astype("float64").values
n = len(r)
if n < 200:
return np.nan
y = np.cumsum(r - np.mean(r))
if win_grid is None:
base = np.array([16, 24, 32, 48, 64, 96, 128, 192, 256], dtype=int)
win_grid = [w for w in base if w <= n // 2]
F_vals, sizes = [], []
for s in win_grid:
m = n // s
if s < 8 or m < 2:
continue
rms_list = []
for i in range(m):
seg = y[i * s : (i + 1) * s]
t = np.arange(s, dtype=float)
A = np.vstack([t, np.ones(s)]).T
coeff, *_ = np.linalg.lstsq(A, seg, rcond=None)
detr = seg - (A @ coeff)
rms = np.sqrt(np.mean(detr**2))
if np.isfinite(rms) and rms > 0:
rms_list.append(rms)
if rms_list:
F_vals.append(np.mean(rms_list))
sizes.append(s)
if len(F_vals) < 3:
return np.nan
sizes = np.array(sizes, float)
F_vals = np.array(F_vals, float)
mask = np.isfinite(F_vals) & (F_vals > 0)
sizes, F_vals = sizes[mask], F_vals[mask]
if sizes.size < 3:
return np.nan
slope, _ = np.polyfit(np.log(sizes), np.log(F_vals), 1)
return float(np.clip(slope, 0.0, 1.0)) if np.isfinite(slope) else np.nan
# ============================================================
# Wavelet denoise (ONLINE, NO look-ahead)
# ============================================================
def wavelet_denoise_last_online(x: np.ndarray) -> float:
"""Denoise usando SOLO la storia fino a t. Ritorna l'ultimo punto denoisato."""
x = np.asarray(x, dtype=float)
n = x.size
if n < DENOISE_MIN_LEN:
return float(x[-1])
s = pd.Series(x).ffill().bfill().values
wav = pywt.Wavelet(DENOISE_WAVELET)
max_level = pywt.dwt_max_level(n, wav.dec_len)
level = int(min(DENOISE_LEVEL, max_level)) if max_level > 0 else 1
coeffs = pywt.wavedec(s, DENOISE_WAVELET, level=level)
detail = coeffs[-1]
if detail.size == 0:
return float(s[-1])
sigma = np.median(np.abs(detail - np.median(detail))) / 0.6745
if not np.isfinite(sigma) or sigma <= 0:
return float(s[-1])
thr = sigma * np.sqrt(2 * np.log(n))
coeffs_th = [coeffs[0]]
for d in coeffs[1:]:
coeffs_th.append(pywt.threshold(d, thr, mode=DENOISE_THRESHOLD_MODE))
rec = pywt.waverec(coeffs_th, DENOISE_WAVELET)
rec = rec[:n]
return float(rec[-1])
# ============================================================
# Per-asset forward EOD backtest
# ============================================================
def forward_backtest_one_asset(close: pd.Series, theta_entry: float, allow_short: bool) -> pd.DataFrame:
"""Backtest EOD:
- Segnale calcolato a close(t) (si usa finestra di returns che include il return di t).
- Esecuzione al close(t).
- PnL per step t = Signal(t) * Ret_fwd(t) - fee * abs(diff(Signal)).
dove Ret_fwd(t) = close(t+1)/close(t)-1.
Output per Date=t con Ret_fwd già allineato.
"""
close = pd.to_numeric(close, errors="coerce").dropna()
close = close[~close.index.duplicated(keep="last")].sort_index()
# returns log per pattern matching
r_log = np.log(close / close.shift(1))
r_log = r_log.dropna()
# forward return (close-to-close): Ret_fwd[t] = close[t+1]/close[t]-1
ret_fwd = close.pct_change().shift(-1)
# Allineamento su date comuni
idx = r_log.index.intersection(ret_fwd.index)
r_log = r_log.loc[idx]
ret_fwd = ret_fwd.loc[idx]
pos = 0
entry_t = None
trade_pnl = 0.0
trade_peak = 0.0
est_hist = []
rows = []
# t è indice relativo su r_log/ret_fwd
for t in range(WP, len(r_log) - 1):
dt = r_log.index[t]
past = r_log.iloc[:t]
if past.dropna().shape[0] < (WP + HA):
rows.append((dt, pos, np.nan, np.nan, np.nan, float(ret_fwd.iloc[t])))
continue
lib_wins, lib_out = build_pattern_library(past, WP, HA)
if lib_wins is None or lib_out is None or len(lib_out) == 0:
rows.append((dt, pos, np.nan, np.nan, np.nan, float(ret_fwd.iloc[t])))
continue
win_last = r_log.iloc[t - WP : t].values
curr_zn = z_norm(win_last)
if curr_zn is None:
rows.append((dt, pos, np.nan, np.nan, np.nan, float(ret_fwd.iloc[t])))
continue
est_raw, avg_dist, _ = predict_from_library(curr_zn, lib_wins, lib_out, k=KNN_K)
est_raw = float(est_raw)
avg_dist = float(avg_dist)
est_hist.append(est_raw)
est_use = wavelet_denoise_last_online(np.array(est_hist, dtype=float)) if DENOISE_ENABLED else est_raw
# ===== ENTRY =====
if pos == 0:
if est_use > theta_entry:
pos = 1
entry_t = t
trade_pnl = 0.0
trade_peak = 0.0
elif allow_short and est_use < -theta_entry:
pos = -1
entry_t = t
trade_pnl = 0.0
trade_peak = 0.0
# ===== EXIT (EOD approximation: usa Ret_fwd della prossima barra t per aggiornare pnl) =====
else:
next_ret = float(ret_fwd.iloc[t])
signed_next = next_ret if pos == 1 else (-next_ret)
pnl_if_stay = (1.0 + trade_pnl) * (1.0 + signed_next) - 1.0
peak_if_stay = max(trade_peak, pnl_if_stay)
exit_now = False
if SL_BPS is not None and pnl_if_stay <= -SL_BPS / 10000.0:
exit_now = True
if TP_BPS is not None and pnl_if_stay >= TP_BPS / 10000.0:
exit_now = True
if TRAIL_BPS is not None and (peak_if_stay - pnl_if_stay) >= TRAIL_BPS / 10000.0:
exit_now = True
if TIME_STOP_BARS is not None and entry_t is not None and (t - entry_t + 1) >= TIME_STOP_BARS:
exit_now = True
# Theta exit simmetrico
if THETA_EXIT is not None:
if pos == 1:
is_weak = est_use <= THETA_EXIT
else:
is_weak = est_use >= -THETA_EXIT
if is_weak:
exit_now = True
if exit_now:
pos = 0
entry_t = None
trade_pnl = 0.0
trade_peak = 0.0
else:
trade_pnl = pnl_if_stay
trade_peak = peak_if_stay
rows.append((dt, pos, est_raw, est_use, avg_dist, float(ret_fwd.iloc[t])))
df = pd.DataFrame(
rows,
columns=["Date", "Signal", "EstOutcomeRaw", "EstOutcome", "AvgDist", "Ret_fwd"],
).set_index("Date")
fee = FEE_BPS / 10000.0
trade_chg = df["Signal"].diff().abs().fillna(0.0)
df["PnL"] = df["Signal"] * df["Ret_fwd"] - trade_chg * fee
return df
# ============================================================
# Portfolio utilities
# ============================================================
def equity_from_returns(r: pd.Series) -> pd.Series:
r = pd.to_numeric(r, errors="coerce").fillna(0.0)
return (1 + r).cumprod() * 100
def monthly_returns(r: pd.Series) -> pd.Series:
r = pd.to_numeric(r, errors="coerce").fillna(0.0)
return (1 + r).resample("M").prod() - 1
def plot_heatmap_monthly(r: pd.Series, title: str):
m = monthly_returns(r)
df = m.to_frame("ret")
df["Year"], df["Month"] = df.index.year, df.index.month
pv = df.pivot(index="Year", columns="Month", values="ret")
fig, ax = plt.subplots(figsize=(10, 6))
im = ax.imshow(pv.fillna(0) * 100, aspect="auto")
for i in range(pv.shape[0]):
for j in range(pv.shape[1]):
val = pv.iloc[i, j]
if not np.isnan(val):
ax.text(j, i, f"{val*100:.1f}", ha="center", va="center", fontsize=8)
ax.set_title(title)
ax.set_xlabel("Mese")
ax.set_ylabel("Anno")
ax.set_xticks(range(12))
ax.set_xticklabels(range(1, 13))
fig.colorbar(im, ax=ax, label="%")
plt.tight_layout()
return fig
# ------------------------------------------------------------
# Progress timer (post-test checkpoints)
# ------------------------------------------------------------
def _format_eta(seconds):
if seconds is None or seconds != seconds:
return "n/a"
seconds = max(0, int(round(seconds)))
minutes, secs = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
if hours:
return f"{hours}h {minutes:02d}m {secs:02d}s"
return f"{minutes}m {secs:02d}s"
_post_timer = {"t0": None, "tprev": None, "total": None, "done": 0}
def start_post_timer(total_steps: int):
_post_timer["t0"] = time.perf_counter()
_post_timer["tprev"] = _post_timer["t0"]
_post_timer["total"] = total_steps
_post_timer["done"] = 0
def checkpoint_post_timer(label: str):
if _post_timer["t0"] is None or _post_timer["total"] is None:
return
_post_timer["done"] += 1
now = time.perf_counter()
step_dt = now - _post_timer["tprev"]
total_dt = now - _post_timer["t0"]
avg = total_dt / max(_post_timer["done"], 1)
eta = avg * max(_post_timer["total"] - _post_timer["done"], 0)
print(f"[TIMER] post {_post_timer['done']}/{_post_timer['total']} {label} - step {step_dt:.2f}s, total {total_dt:.2f}s, ETA {_format_eta(eta)}")
_post_timer["tprev"] = now
def _currency_allocation_from_exposure(exp_df: pd.DataFrame) -> pd.DataFrame:
"""Convert net currency exposure to normalized gross allocation by currency."""
if exp_df is None or getattr(exp_df, "empty", True):
return pd.DataFrame()
W = exp_df.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0)
if W.index.has_duplicates:
W = W[~W.index.duplicated(keep="last")]
W = W.sort_index()
W = W.abs()
keep_cols = [c for c in W.columns if float(np.abs(W[c]).sum()) > 0.0]
if keep_cols:
W = W[keep_cols]
row_sum = W.sum(axis=1).replace(0, np.nan)
W = W.div(row_sum, axis=0).fillna(0.0)
return W
def plot_portfolio_composition_fixed(
weights: pd.DataFrame,
title: str,
save_path: Path | None = None,
max_legend: int = 20,
):
"""Stacked area dei pesi nel tempo (allocazione per valuta)."""
if weights is None or getattr(weights, "empty", True):
print(f"[SKIP] Nessun peso per: {title}")
return
W = weights.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0)
if W.index.has_duplicates:
W = W[~W.index.duplicated(keep="last")]
W = W.sort_index()
keep_cols = [c for c in W.columns if float(np.abs(W[c]).sum()) > 0.0]
if not keep_cols:
print(f"[SKIP] Tutti i pesi sono zero per: {title}")
return
W = W[keep_cols]
if len(W.index) < 2:
print(f"[SKIP] Serie troppo corta per: {title}")
return
avg_w = W.mean(0).sort_values(ascending=False)
ordered = avg_w.index.tolist()
if len(ordered) > max_legend:
head = ordered[:max_legend]
tail = [c for c in ordered if c not in head]
W_show = W[head].copy()
if tail:
W_show["Other"] = W[tail].sum(1)
ordered = head + ["Other"]
else:
ordered = head
else:
W_show = W[ordered].copy()
cmap = plt.colormaps.get_cmap("tab20")
colors = [cmap(i % cmap.N) for i in range(len(ordered))]
fig, ax = plt.subplots(figsize=(11, 6))
ax.stackplot(W_show.index, [W_show[c].values for c in ordered], labels=ordered, colors=colors)
ax.set_title(f"Composizione valute nel tempo - {title}")
ymax = float(np.nanmax(W_show.sum(1).values))
if not np.isfinite(ymax) or ymax <= 0:
ymax = 1.0
ax.set_ylim(0, max(1.0, ymax))
ax.grid(True, alpha=0.3)
ax.set_ylabel("Peso")
ax.set_yticks(ax.get_yticks())
ax.set_yticklabels([f"{y*100:.0f}%" for y in ax.get_yticks()])
ncol = 2 if len(ordered) > 10 else 1
ax.legend(loc="upper left", bbox_to_anchor=(1.01, 1), frameon=False, ncol=ncol, title="Currency")
fig.tight_layout()
if save_path:
fig.savefig(save_path, dpi=150, bbox_inches="tight")
print(f"[INFO] Salvato: {save_path}")
plt.close(fig)
def inverse_vol_weights(returns_df: pd.DataFrame, window: int, max_weight: float | None) -> pd.DataFrame:
vol = returns_df.rolling(window).std()
inv = 1 / vol.replace(0, np.nan)
w = inv.div(inv.sum(axis=1), axis=0)
w = w.ffill()
if max_weight is not None:
w = w.clip(upper=max_weight)
return w
def make_active_weights(w_target: pd.DataFrame, sig: pd.DataFrame, add_cash: bool = True, cash_label: str = "Cash") -> pd.DataFrame:
# Attivo se Signal != 0 (sia long che short)
w = (w_target * (sig != 0)).fillna(0.0)
if add_cash:
w[cash_label] = (1.0 - w.sum(axis=1)).clip(lower=0.0)
return w
# ============================================================
# FX currency exposure cap (net exposure per currency)
# ============================================================
def parse_fx_pair(ticker: str) -> tuple[str, str] | None:
# "EURUSD Curncy" -> ("EUR","USD")
if not isinstance(ticker, str):
return None
pair = ticker.split()[0].strip().upper()
if len(pair) < 6:
return None
return pair[:3], pair[3:6]
def currency_exposure_from_weights(weights_row: pd.Series, sig_row: pd.Series) -> dict[str, float]:
"""Net exposure per currency, considerando anche la direzione (long/short).
Long EURUSD (sig=+1): +w EUR, -w USD
Short EURUSD (sig=-1): -w EUR, +w USD
"""
exp: dict[str, float] = {}
for tkr, w in weights_row.items():
if tkr == "Cash":
continue
s = float(sig_row.get(tkr, 0.0))
if s == 0 or w == 0 or not np.isfinite(w):
continue
pq = parse_fx_pair(tkr)
if pq is None:
continue
base, quote = pq
exp[base] = exp.get(base, 0.0) + w * s
exp[quote] = exp.get(quote, 0.0) - w * s
return exp
def apply_currency_cap(weights_act: pd.DataFrame, sig: pd.DataFrame, cap: float) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Applica cap |exposure(currency)| <= cap.
Metodo conservativo ma robusto (NO look-ahead):
- Se max_abs_exposure > cap, scala TUTTE le posizioni risky di fattore cap/max_abs_exposure,
e manda il residuo a Cash.
Ritorna:
- weights_capped (con Cash)
- exposure_wide (diagnostica: esposizioni per currency)
"""
w = weights_act.copy()
if "Cash" not in w.columns:
w["Cash"] = (1.0 - w.sum(axis=1)).clip(lower=0.0)
tick_cols = [c for c in w.columns if c != "Cash"]
# raccogliamo anche esposizioni
all_ccy = set()
exp_dicts: dict[pd.Timestamp, dict[str, float]] = {}
for dt in w.index:
sig_row = sig.loc[dt] if dt in sig.index else pd.Series(dtype=float)
wr = w.loc[dt, tick_cols].fillna(0.0)
exp = currency_exposure_from_weights(wr, sig_row)
exp_dicts[pd.to_datetime(dt)] = exp
all_ccy.update(exp.keys())
if not exp:
w.loc[dt, "Cash"] = (1.0 - float(wr.sum())).clip(lower=0.0) if hasattr(float(wr.sum()), 'clip') else max(0.0, 1.0 - float(wr.sum()))
continue
max_abs = max(abs(v) for v in exp.values())
if np.isfinite(max_abs) and max_abs > cap and max_abs > 1e-12:
scale = cap / max_abs
wr2 = wr * scale
w.loc[dt, tick_cols] = wr2.values
w.loc[dt, "Cash"] = max(0.0, 1.0 - float(wr2.sum()))
else:
w.loc[dt, "Cash"] = max(0.0, 1.0 - float(wr.sum()))
# exposure wide
all_ccy = sorted(all_ccy)
exp_wide = pd.DataFrame(0.0, index=w.index, columns=all_ccy)
for dt, ex in exp_dicts.items():
if dt in exp_wide.index:
for c, v in ex.items():
exp_wide.loc[dt, c] = v
return w, exp_wide
# ============================================================
# Ranking / scoring (semplice, robusto)
# ============================================================
def drawdown_stats_simple(ret: pd.Series) -> dict:
ret = pd.to_numeric(ret, errors="coerce").fillna(0.0)
eq = (1 + ret).cumprod()
peak = eq.cummax()
dd = eq / peak - 1.0
cagr = eq.iloc[-1] ** (DAYS_PER_YEAR / max(1, len(eq))) - 1
annvol = ret.std() * np.sqrt(DAYS_PER_YEAR)
sharpe = (ret.mean() / (ret.std() + 1e-12)) * np.sqrt(DAYS_PER_YEAR)
maxdd = dd.min() if len(dd) else np.nan
calmar = (cagr / abs(maxdd)) if (np.isfinite(cagr) and np.isfinite(maxdd) and maxdd < 0) else np.nan
return {
"CAGR_%": float(np.round(cagr * 100, 3)) if np.isfinite(cagr) else np.nan,
"AnnVol_%": float(np.round(annvol * 100, 3)) if np.isfinite(annvol) else np.nan,
"Sharpe": float(np.round(sharpe, 3)) if np.isfinite(sharpe) else np.nan,
"MaxDD_%": float(np.round(maxdd * 100, 3)) if np.isfinite(maxdd) else np.nan,
"Calmar": float(np.round(calmar, 3)) if np.isfinite(calmar) else np.nan,
}
def compute_score(summary_df: pd.DataFrame) -> pd.DataFrame:
"""Score semplice e stabile (senza ottimizzazioni fancy):
- alto Sharpe e CAGR, basso MaxDD
- normalizzazione via rank percentili
Mantiene colonne di diagnostica (mode) per trasparenza.
"""
df = summary_df.copy()
# ranks 0..1
def rnk(s):
s = pd.to_numeric(s, errors="coerce")
n = s.notna().sum()
if n <= 1:
return pd.Series(np.nan, index=s.index)
return s.rank(method="average") / n
sharpe_r = rnk(df.get("Sharpe"))
cagr_r = rnk(df.get("CAGR_%"))
maxdd_r = rnk(-pd.to_numeric(df.get("MaxDD_%"), errors="coerce")) # meno negativo = meglio
df["Score_mode"] = "rank_sharpe_cagr_maxdd"
df["Score"] = (0.5 * sharpe_r.fillna(0) + 0.3 * cagr_r.fillna(0) + 0.2 * maxdd_r.fillna(0))
return df
# ============================================================
# MAIN
# ============================================================
def main():
print("\n=== Trading Pattern Recon w Hurst - Forex (CORRETTO) ===")
print(f"Fee: {FEE_BPS} bp | Short: {ALLOW_SHORT} | Currency cap: {CURRENCY_CAP:.2f}")
print(f"Wavelet denoise: {DENOISE_ENABLED} ({DENOISE_WAVELET}, level={DENOISE_LEVEL}, min_len={DENOISE_MIN_LEN})")
print("Esecuzione: close(t), PnL: close(t+1)/close(t)\n")
start_post_timer(5)
# 1) Fetch prices
prices: dict[str, pd.DataFrame] = {}
for tkr in TICKERS:
try:
print(f"Fetching {tkr} ...")
prices[tkr] = fetch_price_series(tkr, FROM_DATE)
except Exception as e:
print(f"[WARN] Skip {tkr}: {e}")
if len(prices) < 5:
raise RuntimeError(f"Pochi ticker validi ({len(prices)}).")
checkpoint_post_timer("Price fetch")
# 2) Per ticker backtest
hurst_rows = []
summary_rows = []
sig_rows = []
total = len(prices)
for i, (tkr, dfp) in enumerate(prices.items(), 1):
print(f"[{i}/{total}] Testing {tkr} ...")
if "AdjClose" not in dfp.columns:
continue
close = dfp["AdjClose"].copy()
close = pd.to_numeric(close, errors="coerce").dropna()
if len(close) < (WP + HA + 80):
print(f"[WARN] Serie troppo corta per {tkr}: len={len(close)}")
continue
r_log = np.log(close / close.shift(1)).dropna()
if len(r_log) < 250:
print(f"[WARN] Serie troppo corta per Hurst per {tkr}")
h_rs = hurst_rs_returns(r_log.values)
h_dfa = hurst_dfa_returns(r_log.values)
H = np.nanmedian([h_rs, h_dfa])
H = float(H) if np.isfinite(H) else np.nan
theta_entry = (H / 100.0) if np.isfinite(H) else THETA_FALLBACK
hurst_rows.append({"Ticker": tkr, "Hurst": H, "theta_entry": float(theta_entry)})
bt = forward_backtest_one_asset(close=close, theta_entry=float(theta_entry), allow_short=ALLOW_SHORT)
bt = bt.copy()
bt.insert(0, "Ticker", tkr)
sig_rows.append(bt.reset_index())
stats = drawdown_stats_simple(bt["PnL"])
hit = 100.0 * ((bt["PnL"] > 0).sum() / max(1, bt["PnL"].notna().sum()))
turnover = bt["Signal"].diff().abs().fillna(0.0).mean() * 100.0 # % steps changed (0..200)
stats.update({
"Ticker": tkr,
"HitRate_%": float(np.round(hit, 3)),
"AvgStepRet_bps": float(np.round(bt["PnL"].mean() * 10000, 3)),
"Turnover_%/day": float(np.round(turnover, 3)),
"N_Steps": int(bt.shape[0]),
"theta_entry": float(theta_entry),
"theta_exit": float(THETA_EXIT) if THETA_EXIT is not None else np.nan,
"sl_bps": float(SL_BPS) if SL_BPS is not None else np.nan,
"tp_bps": float(TP_BPS) if TP_BPS is not None else np.nan,
"trail_bps": float(TRAIL_BPS) if TRAIL_BPS is not None else np.nan,
"time_stop_bars": int(TIME_STOP_BARS) if TIME_STOP_BARS is not None else np.nan,
"allow_short": bool(ALLOW_SHORT),
"fee_bps": float(FEE_BPS),
"wavelet": DENOISE_WAVELET if DENOISE_ENABLED else "none",
"currency_cap": float(CURRENCY_CAP),
})
summary_rows.append(stats)
if not sig_rows:
raise RuntimeError("Nessun ticker backtestato con successo")
checkpoint_post_timer("Per-ticker backtest")
hurst_df = pd.DataFrame(hurst_rows).sort_values("Ticker").reset_index(drop=True)
summary_df = pd.DataFrame(summary_rows).sort_values("Ticker").reset_index(drop=True)
signals_df = pd.concat(sig_rows, ignore_index=True)
signals_df["Date"] = pd.to_datetime(signals_df["Date"]).dt.normalize()
# 3) Ranking / Score / TopN
ranking_df = compute_score(summary_df).sort_values("Score", ascending=False).reset_index(drop=True)
top_tickers = ranking_df.head(TOP_N)["Ticker"].astype(str).tolist()
print(f"\nTop{TOP_N} tickers (dal ranking): {top_tickers}\n")
# 4) Costruzione matrici wide
wide_pnl = signals_df.pivot_table(index="Date", columns="Ticker", values="PnL", aggfunc="sum").fillna(0.0).sort_index()
wide_sig = signals_df.pivot_table(index="Date", columns="Ticker", values="Signal", aggfunc="last").fillna(0).astype(int).sort_index()
wide_est = signals_df.pivot_table(index="Date", columns="Ticker", values="EstOutcome", aggfunc="last").sort_index()
# 5) Selezione dinamica TopN giornaliera: manteniamo lo schema classico (attivi + ranking statico)
# - Equal Weight: 1/N sui tickers del ranking (TopN statico)
# - Risk Parity: inverse-vol sui rendimenti dei tickers TopN statici
cols = [c for c in top_tickers if c in wide_pnl.columns]
if not cols:
raise RuntimeError("Nessun ticker del TopN presente in wide_pnl")
dates = wide_pnl.index
# target weights
w_eq_target = pd.DataFrame(0.0, index=dates, columns=wide_pnl.columns)
w_eq_target.loc[:, cols] = 1.0 / len(cols)
rp_hist = wide_pnl[cols]
w_rp_hist = inverse_vol_weights(rp_hist, window=RP_LOOKBACK, max_weight=RP_MAX_WEIGHT).reindex(dates).ffill().fillna(0.0)
w_rp_target = pd.DataFrame(0.0, index=dates, columns=wide_pnl.columns)
w_rp_target.loc[:, cols] = w_rp_hist.values
# rinormalizza sui cols
s = w_rp_target[cols].sum(axis=1).replace(0, np.nan)
w_rp_target.loc[:, cols] = w_rp_target[cols].div(s, axis=0).fillna(0.0)
# active weights (mask su Signal != 0) + cash
w_eq_act = make_active_weights(w_eq_target, wide_sig, add_cash=True)
w_rp_act = make_active_weights(w_rp_target, wide_sig, add_cash=True)
# currency cap
w_eq_cap, ccy_eq = apply_currency_cap(w_eq_act, wide_sig, cap=CURRENCY_CAP)
w_rp_cap, ccy_rp = apply_currency_cap(w_rp_act, wide_sig, cap=CURRENCY_CAP)
# portfolio returns (solo risky assets)
ret_eq = (wide_pnl * w_eq_cap.drop(columns=["Cash"], errors="ignore")).sum(axis=1).rename("Ret_EqW_TopN")
ret_rp = (wide_pnl * w_rp_cap.drop(columns=["Cash"], errors="ignore")).sum(axis=1).rename("Ret_RP_TopN")
eq_eq = equity_from_returns(ret_eq).rename("Eq_EqW_TopN")
eq_rp = equity_from_returns(ret_rp).rename("Eq_RP_TopN")
checkpoint_post_timer("Portfolio build")
# 6) Plots
plt.figure(figsize=(10, 5))
plt.plot(eq_eq, label=f"Equal Weight (Top{TOP_N}, fee {FEE_BPS}bp)")
plt.plot(eq_rp, label=f"Risk Parity (Top{TOP_N}, cap {RP_MAX_WEIGHT:.4f}, fee {FEE_BPS}bp)")
plt.title(f"Equity line portafogli FX (base 100) - Top{TOP_N} (shared_utils + URL)")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(PLOT_DIR / "equity_line.png", dpi=150)
plt.close()
fig_eq = plot_heatmap_monthly(ret_eq, f"Heatmap mensile - Equal Weight FX (Top{TOP_N})")
fig_eq.savefig(PLOT_DIR / "heatmap_eqw.png", dpi=150)
plt.close(fig_eq)
fig_rp = plot_heatmap_monthly(ret_rp, f"Heatmap mensile - Risk Parity FX (Top{TOP_N})")
fig_rp.savefig(PLOT_DIR / "heatmap_rp.png", dpi=150)
plt.close(fig_rp)
ccy_eq_alloc = _currency_allocation_from_exposure(ccy_eq)
ccy_rp_alloc = _currency_allocation_from_exposure(ccy_rp)
plot_portfolio_composition_fixed(
ccy_eq_alloc,
"Equal Weight (currency gross)",
PLOT_DIR / "composition_equal_weight_active.png",
)
plot_portfolio_composition_fixed(
ccy_rp_alloc,
"Risk Parity (currency gross)",
PLOT_DIR / "composition_risk_parity_active.png",
)
checkpoint_post_timer("Plots")
# 7) Export
hurst_df.to_csv(OUT_DIR / "hurst.csv", index=False)
summary_df.to_csv(OUT_DIR / "forward_bt_summary.csv", index=False)
signals_df.to_csv(OUT_DIR / "forward_bt_signals.csv", index=False)
ranking_df.to_csv(OUT_DIR / "ranking_score.csv", index=False)
pd.Series(top_tickers, name="TopN_Tickers").to_csv(OUT_DIR / "topn_tickers.csv", index=False)
pd.concat([ret_eq, ret_rp, eq_eq, eq_rp], axis=1).to_csv(OUT_DIR / "portfolio_daily.csv")
w_eq_cap.to_csv(OUT_DIR / "weights_eq_active_capped.csv")
w_rp_cap.to_csv(OUT_DIR / "weights_rp_active_capped.csv")
ccy_eq.to_csv(OUT_DIR / "currency_exposure_eq.csv")
ccy_rp.to_csv(OUT_DIR / "currency_exposure_rp.csv")
checkpoint_post_timer("Exports")
print(f"\nSaved to: {OUT_DIR.resolve()}\n")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,591 @@
# -*- coding: utf-8 -*-
"""
Equity/Reconciliation Builder from Audit Log [v2]
Modifiche rispetto a v1:
- Fix parser header CSV: rimosso split(",",1)[0] che troncava le colonne
- Fix mixed_rows: conta solo righe realmente miste (non tutti i CSV comma-separated)
- Fix EntryIndex: usa None come sentinel; fallback a searchsorted sempre se assente
- Fix euristica % vs decimale: soglia più robusta + check consistenza colonne
- Fix compatibilità Python 3.8: dict | dict sostituito con {**d1, **d2}
- Performance: iterrows() -> itertuples()
- Errori: uniformato FileNotFoundError/ValueError al posto di SystemExit raw
- Side effects: lettura config spostata in _load_globals() chiamata da main()
- Legge trades_audit_log.csv (OPEN/CLOSE; EntryAmount base=100; EntryIndex opzionale)
- Scarica rendimenti giornalieri via stored procedure (connection.txt)
- Converte i rendimenti in decimali coerenti (percentuali => /100; log-return => expm1)
- Ricostruisce i rendimenti giornalieri per strategia come MEDIA PONDERATA sui trade attivi
- Salva:
- daily_returns_by_strategy.csv
- equity_by_strategy.csv
- debug_daily_by_strategy.csv
- equity_by_strategy.png, drawdown_by_strategy.png
- Mostra anche a video i grafici
"""
from __future__ import annotations
from pathlib import Path
import pandas as pd
import numpy as np
import shutil
from shared_utils import (
detect_column,
load_config,
read_connection_txt,
require_section,
)
# =============================================================================
# PATH & OUTPUT
# =============================================================================
BASE_DIR = Path(__file__).resolve().parent
# Defaults — verranno aggiornati da _load_globals() prima dell'uso
OUTPUT_DIR = BASE_DIR / "output"
PLOT_DIR = BASE_DIR / "plot"
AUDIT_LOG_CSV = BASE_DIR / "output" / "trades_audit_log.csv"
CONNECTION_TXT = BASE_DIR / "connection.txt"
OUT_DAILY_CSV = OUTPUT_DIR / "daily_returns_by_strategy.csv"
OUT_EQUITY_CSV = OUTPUT_DIR / "equity_by_strategy.csv"
OUT_DEBUG_CSV = OUTPUT_DIR / "debug_daily_by_strategy.csv"
PLOT_EQUITY = PLOT_DIR / "equity_by_strategy.png"
PLOT_DD = PLOT_DIR / "drawdown_by_strategy.png"
DROPBOX_EXPORT_DIR = Path(r"C:\Users\Admin\Dropbox\Condivisa Lavoro\Segnali di trading su ETF")
# Stored procedure defaults
SP_NAME_DEFAULT = "opt_RendimentoGiornaliero1_ALL"
SP_N_DEFAULT = 1260
PTF_CURR_DEFAULT = "EUR"
DEFAULT_STRATEGIES = ["Equal_Weight", "Risk_Parity"]
VALID_STRATEGIES = list(DEFAULT_STRATEGIES)
def _load_globals() -> None:
"""Carica config e aggiorna le variabili globali. Chiamata una sola volta da main()."""
global OUTPUT_DIR, PLOT_DIR, AUDIT_LOG_CSV, CONNECTION_TXT
global OUT_DAILY_CSV, OUT_EQUITY_CSV, OUT_DEBUG_CSV, PLOT_EQUITY, PLOT_DD
global SP_NAME_DEFAULT, SP_N_DEFAULT, PTF_CURR_DEFAULT, VALID_STRATEGIES
try:
config = load_config()
paths_cfg = require_section(config, "paths")
except Exception as exc:
print(f"[WARN] Config non disponibile ({exc}); uso i percorsi di default.")
config = None
paths_cfg = {}
OUTPUT_DIR = BASE_DIR / paths_cfg.get("output_dir", "output")
PLOT_DIR = BASE_DIR / paths_cfg.get("plot_dir", "plot")
AUDIT_LOG_CSV = BASE_DIR / paths_cfg.get("audit_log_csv", str(OUTPUT_DIR / "trades_audit_log.csv"))
CONNECTION_TXT = BASE_DIR / paths_cfg.get("connection_txt", "connection.txt")
OUT_DAILY_CSV = OUTPUT_DIR / "daily_returns_by_strategy.csv"
OUT_EQUITY_CSV = OUTPUT_DIR / "equity_by_strategy.csv"
OUT_DEBUG_CSV = OUTPUT_DIR / "debug_daily_by_strategy.csv"
PLOT_EQUITY = PLOT_DIR / "equity_by_strategy.png"
PLOT_DD = PLOT_DIR / "drawdown_by_strategy.png"
try:
db_cfg = require_section(config, "db") if config else {}
except Exception as exc:
print(f"[WARN] Config DB non disponibile ({exc}); uso i default interni.")
db_cfg = {}
if db_cfg:
SP_NAME_DEFAULT = str(db_cfg.get("stored_proc", SP_NAME_DEFAULT))
SP_N_DEFAULT = int(db_cfg.get("n_bars", SP_N_DEFAULT))
PTF_CURR_DEFAULT = str(db_cfg.get("ptf_curr", PTF_CURR_DEFAULT))
equity_cfg = config.get("equity_log", {}) if config and isinstance(config, dict) else {}
raw_whitelist = equity_cfg.get("strategy_whitelist") if isinstance(equity_cfg, dict) else None
if raw_whitelist:
whitelist = [str(x).strip() for x in raw_whitelist if str(x).strip()]
if whitelist:
VALID_STRATEGIES = whitelist
# =============================================================================
# UTILITY
# =============================================================================
def copy_to_dropbox(src: Path, dst_dir: Path = DROPBOX_EXPORT_DIR) -> bool:
if not src or not dst_dir:
return False
if not src.exists():
print(f"[WARN] file non trovato per copia Dropbox: {src}")
return False
try:
dst_dir.mkdir(parents=True, exist_ok=True)
dst = dst_dir / src.name
shutil.copy2(src, dst)
print(f"[DROPBOX] Copiato {src.name} in {dst_dir}")
return True
except Exception as exc:
print(f"[WARN] impossibile copiare {src} su {dst_dir}: {exc}")
return False
# =============================================================================
# AUDIT LOG LOADER (FORMAT CHECKS)
# =============================================================================
REQUIRED_AUDIT_COLS = ["Strategy", "ISIN", "Action", "TradeDate"]
CANONICAL_AUDIT_COLS = [
"Strategy",
"ISIN",
"Action",
"TradeDate",
"EntryIndex",
"EntryAmount",
"SizeWeight",
"Price",
"PnL_%",
"ExitReason",
"LinkedOpenDate",
"Duration_bars",
"Notes",
]
NUMERIC_COLS = [
"EntryIndex",
"EntryAmount",
"SizeWeight",
"Price",
"PnL_%",
"Duration_bars",
]
def _clean_numeric_series(s: pd.Series) -> pd.Series:
if pd.api.types.is_numeric_dtype(s):
return s
txt = s.astype(str).str.strip()
txt = txt.str.replace("%", "", regex=False)
txt = txt.replace({"": np.nan, "nan": np.nan, "None": np.nan})
def _fix_one(val: str) -> str:
if val is None or (isinstance(val, float) and np.isnan(val)):
return val
v = str(val).strip()
if not v:
return v
dot_n = v.count(".")
comma_n = v.count(",")
if dot_n > 1 and comma_n == 0:
# più punti senza virgole => punti come separatori migliaia
return v.replace(".", "")
if dot_n > 0 and comma_n > 0:
last_dot = v.rfind(".")
last_comma = v.rfind(",")
if last_comma > last_dot:
# virgola decimale, punti migliaia
return v.replace(".", "").replace(",", ".")
# punto decimale, virgole migliaia
return v.replace(",", "")
if comma_n > 0 and dot_n == 0:
# solo virgola => virgola decimale
return v.replace(",", ".")
return v
cleaned = txt.map(_fix_one)
return pd.to_numeric(cleaned, errors="coerce")
def _parse_mixed_dates(series: pd.Series) -> pd.Series:
s = series.astype(str).str.strip()
s = s.replace({"": np.nan, "nan": np.nan, "None": np.nan})
dt_iso = pd.to_datetime(s, format="%Y-%m-%d", errors="coerce")
dt_iso_ts = pd.to_datetime(s, format="%Y-%m-%d %H:%M:%S", errors="coerce")
dt_dmy = pd.to_datetime(s, format="%d/%m/%Y", errors="coerce")
dt_dmy_ts = pd.to_datetime(s, format="%d/%m/%Y %H:%M:%S", errors="coerce")
return dt_iso.fillna(dt_iso_ts).fillna(dt_dmy).fillna(dt_dmy_ts)
def load_audit_log(path: Path) -> pd.DataFrame:
if not path.exists():
raise FileNotFoundError(f"Missing trades_audit_log.csv at {path}")
raw = path.read_text(encoding="utf-8", errors="ignore")
if not raw.strip():
raise ValueError("Audit log vuoto.")
lines = raw.splitlines()
first_line = lines[0]
# FIX v2: l'header viene splittato sul separatore reale senza troncature.
# In v1 c'era un errato split(",", 1)[0] che perdeva le colonne dopo la prima virgola.
if ";" in first_line:
header = [c.strip() for c in first_line.split(";")]
else:
header = [c.strip() for c in first_line.split(",")]
# Rimuovi eventuale colonna indice vuota in testa (es. export pandas con index)
if header and header[0] == "":
header = header[1:]
if not header or "TradeDate" not in header:
header = CANONICAL_AUDIT_COLS.copy()
# Determina il separatore dominante per le righe dati
semicolon_file = ";" in first_line
rows = []
mixed_rows = 0
for line in lines[1:]:
if not line or not line.strip():
continue
# FIX v2: "mixed" solo se il separatore della riga differisce dal file header.
# In v1 tutte le righe CSV venivano contate come "legacy/misto".
if semicolon_file:
if ";" in line and line.count(";") >= (len(header) - 1):
parts = line.split(";")
else:
parts = line.split(",")
mixed_rows += 1
else:
if ";" in line and line.count(";") >= (len(header) - 1):
parts = line.split(";")
mixed_rows += 1
else:
parts = line.split(",")
if parts and parts[0] == "":
parts = parts[1:]
if len(parts) > len(header):
parts = parts[: len(header) - 1] + [",".join(parts[len(header) - 1:])]
elif len(parts) < len(header):
parts = parts + [""] * (len(header) - len(parts))
rows.append(parts)
df = pd.DataFrame(rows, columns=header)
if mixed_rows > 0:
print(f"[WARN] Audit log con {mixed_rows} righe in formato legacy/misto: normalizzate in lettura.")
missing = [c for c in REQUIRED_AUDIT_COLS if c not in df.columns]
if missing:
raise ValueError(
f"Formato audit log non valido. Colonne mancanti: {missing}. "
f"Colonne trovate: {list(df.columns)}"
)
# Normalizza colonne chiave
df["Action"] = df["Action"].astype(str).str.upper().str.strip()
df["Strategy"] = df["Strategy"].astype(str).str.strip()
df["ISIN"] = df["ISIN"].astype(str).str.strip()
# Date
df["TradeDate"] = _parse_mixed_dates(df["TradeDate"])
if "LinkedOpenDate" in df.columns:
df["LinkedOpenDate"] = _parse_mixed_dates(df["LinkedOpenDate"])
# Rimuovi righe con date invalide
before = len(df)
df = df.dropna(subset=["TradeDate"])
dropped = before - len(df)
if dropped > 0:
print(f"[WARN] Rimosse {dropped} righe con TradeDate non valido.")
# Mantieni solo OPEN/CLOSE
if "Action" in df.columns:
before = len(df)
df = df[df["Action"].isin(["OPEN", "CLOSE"])]
dropped = before - len(df)
if dropped > 0:
print(f"[WARN] Rimosse {dropped} righe con Action non valida.")
# Pulizia numerica
for col in NUMERIC_COLS:
if col in df.columns:
df[col] = _clean_numeric_series(df[col])
return df
# =============================================================================
# FETCH RENDIMENTI DAL DB
# =============================================================================
def fetch_returns_from_db(isins, start_date, end_date) -> pd.DataFrame:
import sqlalchemy as sa
from sqlalchemy import text as sql_text
conn_str = read_connection_txt(CONNECTION_TXT)
engine = sa.create_engine(conn_str, fast_executemany=True)
sp = SP_NAME_DEFAULT
nbar = SP_N_DEFAULT
ptf = PTF_CURR_DEFAULT
sql_sp = sql_text(f"EXEC {sp} @ISIN = :isin, @n = :n, @PtfCurr = :ptf")
frames = []
with engine.begin() as conn:
for isin in isins:
try:
df = pd.read_sql_query(sql_sp, conn, params={"isin": isin, "n": nbar, "ptf": ptf})
except Exception as e:
print(f"[ERROR] SP {sp} fallita per {isin}: {e}")
continue
if df.empty:
continue
col_date = detect_column(df, ["Date", "Data", "Datetime", "Timestamp", "Time"])
col_ret = detect_column(df, ["Ret", "Return", "Rendimento", "Rend", "Ret_%", "RET"])
col_px = detect_column(df, ["Close", "AdjClose", "Price", "Px", "Last", "Prezzo", "Chiusura"])
if not col_date:
continue
df[col_date] = pd.to_datetime(df[col_date], errors="coerce")
df = df.dropna(subset=[col_date]).sort_values(col_date)
if col_ret:
r = pd.to_numeric(df[col_ret], errors="coerce")
out = pd.DataFrame({"Date": df[col_date], "ISIN": isin, "Ret": r})
elif col_px:
px = pd.to_numeric(df[col_px], errors="coerce").replace(0, np.nan)
log_r = np.log(px / px.shift(1))
r = np.expm1(log_r) # log-return -> rendimento semplice decimale
out = pd.DataFrame({"Date": df[col_date], "ISIN": isin, "Ret": r})
else:
continue
frames.append(out)
if not frames:
return pd.DataFrame(index=pd.DatetimeIndex([], name="Date"))
long = pd.concat(frames, ignore_index=True).dropna(subset=["Date"])
mask = (
(long["Date"].dt.date >= start_date)
& (long["Date"].dt.date <= end_date)
)
long = long.loc[mask]
wide = long.pivot(index="Date", columns="ISIN", values="Ret").sort_index()
# FIX v2: euristica % vs decimale più robusta.
# Verifica la mediana dei valori assoluti per colonna, non solo il massimo globale.
# Se la maggioranza delle colonne ha mediana > 0.5, i dati sono probabilmente percentuali.
if not wide.empty:
col_medians = np.nanmedian(np.abs(wide.values), axis=0)
pct_cols = np.sum(col_medians > 0.5)
total_cols = len(col_medians)
if total_cols > 0 and (pct_cols / total_cols) > 0.5:
wide = wide / 100.0
print(f"[INFO] Rendimenti convertiti da % a decimale ({pct_cols}/{total_cols} colonne con mediana >0.5).")
return wide
# =============================================================================
# RICOSTRUZIONE DAILY RETURNS
# =============================================================================
def rebuild_daily_from_log(audit: pd.DataFrame, returns_wide: pd.DataFrame) -> pd.DataFrame:
strategies = sorted(audit["Strategy"].dropna().astype(str).unique())
if not strategies:
return pd.DataFrame(index=returns_wide.index, columns=[])
idx = returns_wide.index
daily_num = pd.DataFrame(0.0, index=idx, columns=strategies)
daily_den = pd.DataFrame(0.0, index=idx, columns=strategies)
closes = audit[audit["Action"] == "CLOSE"].copy()
if not closes.empty:
if "LinkedOpenDate" in closes.columns:
closes["trade_key"] = (
closes["ISIN"].astype(str)
+ "|"
+ pd.to_datetime(closes["LinkedOpenDate"]).dt.strftime("%Y-%m-%d")
)
else:
closes["trade_key"] = (
closes["ISIN"].astype(str)
+ "|"
+ pd.to_datetime(closes["TradeDate"]).dt.strftime("%Y-%m-%d")
)
closes["TradeDate"] = pd.to_datetime(closes["TradeDate"])
closes_agg = closes.sort_values("TradeDate").groupby("trade_key", as_index=False)["TradeDate"].last()
close_map = closes_agg.set_index("trade_key")
else:
close_map = pd.DataFrame(columns=["TradeDate"]).set_index(pd.Index([], name="trade_key"))
# debug counters
total_opens = 0
used_opens = 0
skipped_missing_isin = 0
skipped_bad_amount = 0
skipped_bad_window = 0
for strat in strategies:
aud_s = audit[audit["Strategy"] == strat]
opens = aud_s[aud_s["Action"] == "OPEN"].copy()
if opens.empty:
continue
opens["trade_key"] = (
opens["ISIN"].astype(str)
+ "|"
+ pd.to_datetime(opens["TradeDate"]).dt.strftime("%Y-%m-%d")
)
# FIX v2: itertuples() al posto di iterrows() — più veloce su dataset grandi.
for op in opens.itertuples(index=False):
total_opens += 1
isin = op.ISIN
if isin not in returns_wide.columns:
skipped_missing_isin += 1
continue
ser = returns_wide[isin].astype(float)
entry_amount = float(op.EntryAmount if pd.notna(op.EntryAmount) else 0.0)
if entry_amount <= 0:
skipped_bad_amount += 1
continue
# FIX v2: EntryIndex usato solo se presente e valido; altrimenti sempre
# searchsorted sulla TradeDate. In v1 il default 0 era silenziosamente
# accettato come indice reale anche quando il campo era assente.
raw_idx = getattr(op, "EntryIndex", None)
if raw_idx is not None and pd.notna(raw_idx):
entry_idx = int(raw_idx)
if entry_idx < 0 or entry_idx >= len(ser):
d_open = pd.Timestamp(op.TradeDate).normalize()
entry_idx = int(ser.index.searchsorted(d_open, side="left"))
else:
d_open = pd.Timestamp(op.TradeDate).normalize()
entry_idx = int(ser.index.searchsorted(d_open, side="left"))
key = op.trade_key
if key in close_map.index:
close_val = close_map.loc[key, "TradeDate"]
if isinstance(close_val, pd.Series):
close_val = close_val.iloc[-1]
d_close = pd.Timestamp(close_val).normalize()
exit_idx = int(ser.index.searchsorted(d_close, side="left"))
else:
exit_idx = len(ser)
if exit_idx <= entry_idx:
skipped_bad_window += 1
continue
idx_seg = ser.index[entry_idx:exit_idx]
vals_seg = ser.values[entry_idx:exit_idx]
daily_num.loc[idx_seg, strat] += entry_amount * vals_seg
daily_den.loc[idx_seg, strat] += entry_amount
used_opens += 1
daily = pd.DataFrame(0.0, index=idx, columns=strategies)
mask = daily_den > 0
daily[mask] = daily_num[mask] / daily_den[mask]
# FIX v2: {**d1, **d2, **d3} al posto di d1 | d2 | d3 (richiede Python 3.9+).
debug_dict = {
**{f"num_{c}": daily_num[c] for c in strategies},
**{f"den_{c}": daily_den[c] for c in strategies},
**{f"ret_{c}": daily[c] for c in strategies},
}
debug = pd.concat(debug_dict, axis=1)
debug.to_csv(OUT_DEBUG_CSV, index_label="Date")
print(
f"[DEBUG] OPEN totali: {total_opens}, usati: {used_opens}, "
f"mancano ISIN: {skipped_missing_isin}, "
f"EntryAmount<=0: {skipped_bad_amount}, "
f"finestra non valida: {skipped_bad_window}"
)
return daily
# =============================================================================
# MAIN
# =============================================================================
def main():
# FIX v2: config e globals caricati qui, non a import-time.
_load_globals()
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
PLOT_DIR.mkdir(parents=True, exist_ok=True)
if not AUDIT_LOG_CSV.exists():
raise FileNotFoundError(f"Missing trades_audit_log.csv at {AUDIT_LOG_CSV}")
audit = load_audit_log(AUDIT_LOG_CSV)
if audit.empty:
raise ValueError("Audit log vuoto.")
if "Strategy" not in audit.columns:
raise ValueError("Colonna 'Strategy' mancante nell'audit log.")
# Filtro whitelist
audit["Strategy"] = audit["Strategy"].astype(str)
before = len(audit)
audit = audit[audit["Strategy"].isin(VALID_STRATEGIES)]
removed = before - len(audit)
if removed > 0:
print(f"[INFO] Filtrate {removed} righe con strategie non incluse in {VALID_STRATEGIES}.")
if audit.empty:
raise ValueError(f"Nessuna riga con strategie in {VALID_STRATEGIES} nell'audit log.")
start_date = (audit["TradeDate"].min() - pd.Timedelta(days=10)).date()
end_date = (audit["TradeDate"].max() + pd.Timedelta(days=10)).date()
isins = sorted(audit["ISIN"].dropna().astype(str).unique())
ret_wide = fetch_returns_from_db(isins, start_date, end_date)
if ret_wide.empty:
raise RuntimeError("Nessun rendimento recuperato dal DB nell'intervallo richiesto.")
daily = rebuild_daily_from_log(audit, ret_wide).sort_index()
daily.to_csv(OUT_DAILY_CSV, index_label="Date")
equity = (1.0 + daily.fillna(0.0)).cumprod() * 100.0
equity.to_csv(OUT_EQUITY_CSV, index_label="Date")
import matplotlib.pyplot as plt
# Equity
plt.figure(figsize=(10, 6))
for col in equity.columns:
plt.plot(equity.index, equity[col], label=col)
plt.title("Equity per Strategy")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(str(PLOT_EQUITY), dpi=150)
plt.close()
copy_to_dropbox(PLOT_EQUITY)
# Drawdown
dd = equity / equity.cummax() - 1.0
plt.figure(figsize=(10, 5))
for col in dd.columns:
plt.plot(dd.index, dd[col], label=col)
plt.title("Drawdown per Strategy")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(str(PLOT_DD), dpi=150)
plt.close()
copy_to_dropbox(PLOT_DD)
print("Salvati:")
print(" -", OUT_DAILY_CSV)
print(" -", OUT_EQUITY_CSV)
print(" -", OUT_DEBUG_CSV)
print(" -", PLOT_EQUITY)
print(" -", PLOT_DD)
print(" -", DROPBOX_EXPORT_DIR / PLOT_EQUITY.name)
print(" -", DROPBOX_EXPORT_DIR / PLOT_DD.name)
if __name__ == "__main__":
main()