848 lines
29 KiB
Python
848 lines
29 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""Trading Pattern Recon w Hurst - Forex
|
||
|
||
VERSIONE CORRETTA (EOD Close-to-Close) – con SHORT, Wavelet denoise (DB) su EstOutcome,
|
||
fee 1bp e cap esposizione per singola valuta 35%.
|
||
|
||
REQUISITI IMPLEMENTATI (come da tue istruzioni):
|
||
1) Denoise su EstOutcome (rolling/online, NO look-ahead)
|
||
2) Wavelet Daubechies (DB) -> db4
|
||
3) Operatività EOD: decisione a close(t), esecuzione a close(t)
|
||
4) PnL sempre su close(t+1)/close(t) (cioè Ret_fwd = close[t+1]/close[t]-1)
|
||
5) Fee = 1 bp applicata sui cambi di posizione (enter/exit/flip) al close(t)
|
||
6) Cap esposizione per singola valuta = 35% (net exposure per currency)
|
||
7) Per il resto: mantiene lo schema originale (shared_utils, per-asset backtest, ranking, Top15, EW e Risk Parity)
|
||
|
||
NOTA:
|
||
- Lo script usa SOLO serie AdjClose dal tuo endpoint (stesso JSON di prima).
|
||
- Non usa Open/High/Low e non introduce logiche intraday.
|
||
|
||
Output principali (cartella OUT_DIR):
|
||
- forward_bt_signals.csv (per-ticker: Signal, EstOutcomeRaw, EstOutcomeDenoised, Ret_fwd, PnL)
|
||
- forward_bt_summary.csv (metriche per ticker)
|
||
- ranking_score.csv (ranking + score)
|
||
- topn_tickers.csv (TopN dal ranking)
|
||
- portfolio_daily.csv (ret/eq line EW e RP)
|
||
- weights_eq_active_capped.csv / weights_rp_active_capped.csv (pesi con cash + cap valuta)
|
||
- currency_exposure_eq.csv / currency_exposure_rp.csv (diagnostica esposizioni)
|
||
|
||
Dipendenze:
|
||
- numpy, pandas, matplotlib, requests
|
||
- PyWavelets (pip install PyWavelets)
|
||
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import sys
|
||
import types
|
||
from pathlib import Path
|
||
from urllib.parse import quote
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import requests
|
||
|
||
# -------------------------
|
||
# Wavelets
|
||
# -------------------------
|
||
try:
|
||
import pywt
|
||
except ImportError as e:
|
||
raise ImportError("Manca PyWavelets (pywt). Installa con: pip install PyWavelets") from e
|
||
|
||
|
||
# ------------------------------------------------------------
|
||
# shared_utils import (local file next to this script)
|
||
# ------------------------------------------------------------
|
||
# Alcune versioni di shared_utils importano moduli opzionali (es. pyodbc). Evitiamo crash.
|
||
sys.modules.setdefault("pyodbc", types.SimpleNamespace())
|
||
|
||
import importlib.util
|
||
|
||
SHARED_UTILS_PATH = Path(__file__).with_name("shared_utils.py")
|
||
if not SHARED_UTILS_PATH.exists():
|
||
raise FileNotFoundError(f"shared_utils.py non trovato accanto allo script: {SHARED_UTILS_PATH}")
|
||
|
||
spec = importlib.util.spec_from_file_location("shared_utils", str(SHARED_UTILS_PATH))
|
||
shared_utils = importlib.util.module_from_spec(spec)
|
||
sys.modules["shared_utils"] = shared_utils
|
||
assert spec.loader is not None
|
||
spec.loader.exec_module(shared_utils)
|
||
|
||
build_pattern_library = shared_utils.build_pattern_library
|
||
predict_from_library = shared_utils.predict_from_library
|
||
z_norm = shared_utils.z_norm
|
||
|
||
|
||
# ============================================================
|
||
# CONFIG
|
||
# ============================================================
|
||
TICKERS = [
|
||
"EURUSD Curncy",
|
||
"USDJPY Curncy",
|
||
"GBPUSD Curncy",
|
||
"USDCHF Curncy",
|
||
"USDCAD Curncy",
|
||
"AUDUSD Curncy",
|
||
"NZDUSD Curncy",
|
||
"EURJPY Curncy",
|
||
"EURGBP Curncy",
|
||
"EURCHF Curncy",
|
||
"EURCAD Curncy",
|
||
"EURAUD Curncy",
|
||
"EURNZD Curncy",
|
||
"GBPJPY Curncy",
|
||
"GBPCHF Curncy",
|
||
"GBPCAD Curncy",
|
||
"GBPAUD Curncy",
|
||
"GBPNZD Curncy",
|
||
"CHFJPY Curncy",
|
||
"CADJPY Curncy",
|
||
"AUDJPY Curncy",
|
||
"NZDJPY Curncy",
|
||
"AUDNZD Curncy",
|
||
"AUDCAD Curncy",
|
||
"AUDCHF Curncy",
|
||
"NZDCAD Curncy",
|
||
"NZDCHF Curncy",
|
||
"CADCHF Curncy",
|
||
]
|
||
TICKERS = [t.strip() for t in TICKERS if isinstance(t, str) and t.strip()]
|
||
|
||
BASE_URL = "https://fin.scorer.app/finance/v2/history"
|
||
FROM_DATE = "20201224"
|
||
|
||
# Pattern params (come nello schema originale)
|
||
WP = 60
|
||
HA = 10
|
||
KNN_K = 25
|
||
|
||
# Short attivo per FX
|
||
ALLOW_SHORT = True
|
||
|
||
# Fee (FIX: 1 bp)
|
||
FEE_BPS = 1
|
||
|
||
# Exit controls (EOD approximation su Ret_fwd)
|
||
SL_BPS = 300.0
|
||
TP_BPS = 800.0
|
||
TRAIL_BPS = 300.0
|
||
TIME_STOP_BARS = 20
|
||
THETA_EXIT = 0.0
|
||
|
||
# Theta entry fallback se Hurst non disponibile
|
||
THETA_FALLBACK = 0.005
|
||
|
||
# Portfolio construction
|
||
TOP_N = 15
|
||
RP_MAX_WEIGHT = 2.0 / TOP_N # 0.1333... (come in legenda che avevi visto)
|
||
RANKING_WINDOW_BARS = 252
|
||
RP_LOOKBACK = 60
|
||
|
||
# Currency cap (net exposure per currency)
|
||
CURRENCY_CAP = 0.35
|
||
|
||
# Wavelet denoise su EstOutcome
|
||
DENOISE_ENABLED = True
|
||
DENOISE_WAVELET = "db4" # DB family (Daubechies)
|
||
DENOISE_LEVEL = 3
|
||
DENOISE_MIN_LEN = 96
|
||
DENOISE_THRESHOLD_MODE = "soft"
|
||
|
||
DAYS_PER_YEAR = 252
|
||
|
||
OUT_DIR = Path("./out_shared_utils_url_forex")
|
||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
|
||
# ============================================================
|
||
# Helpers: JSON -> DataFrame (AdjClose)
|
||
# ============================================================
|
||
|
||
def _detect_col(cols, candidates):
|
||
cols_l = {c.lower(): c for c in cols}
|
||
for cand in candidates:
|
||
if cand.lower() in cols_l:
|
||
return cols_l[cand.lower()]
|
||
# fallback: contains
|
||
for cand in candidates:
|
||
for c in cols:
|
||
if cand.lower() in str(c).lower():
|
||
return c
|
||
return None
|
||
|
||
|
||
def fetch_price_series(ticker: str, from_date: str) -> pd.DataFrame:
|
||
"""Scarica il JSON e restituisce DataFrame indicizzato per Date con colonna AdjClose."""
|
||
url = f"{BASE_URL}/{quote(ticker)}?fromDate={from_date}"
|
||
r = requests.get(url, timeout=30)
|
||
r.raise_for_status()
|
||
obj = r.json()
|
||
|
||
# Gestione schemi JSON possibili
|
||
if isinstance(obj, list) and len(obj) == 1 and isinstance(obj[0], dict) and "data" in obj[0]:
|
||
obj = obj[0]["data"]
|
||
|
||
if not isinstance(obj, list):
|
||
raise ValueError(f"Schema JSON inatteso per {ticker}: {type(obj)}")
|
||
|
||
df = pd.DataFrame(obj)
|
||
if df.empty:
|
||
raise ValueError(f"Nessuna riga per {ticker}")
|
||
|
||
col_date = _detect_col(df.columns, ["date", "datetime", "timestamp", "time"])
|
||
if col_date is None:
|
||
raise ValueError(f"Colonna date non trovata per {ticker}. Colonne: {df.columns.tolist()}")
|
||
|
||
# Priorità: AdjClose, altrimenti Close
|
||
col_px = _detect_col(df.columns, ["adj_close", "adjclose", "adjusted_close", "Adj Close", "AdjClose"])
|
||
if col_px is None:
|
||
col_px = _detect_col(df.columns, ["close", "px_last", "last", "price"])
|
||
if col_px is None:
|
||
raise ValueError(f"Colonna prezzo non trovata per {ticker}. Colonne: {df.columns.tolist()}")
|
||
|
||
df[col_date] = pd.to_datetime(df[col_date], errors="coerce", utc=True).dt.tz_localize(None)
|
||
df[col_px] = pd.to_numeric(df[col_px], errors="coerce")
|
||
|
||
df = df.dropna(subset=[col_date, col_px]).sort_values(col_date)
|
||
df = df.drop_duplicates(subset=[col_date]).set_index(col_date)
|
||
|
||
# Normalizza indice a date
|
||
df.index = pd.to_datetime(df.index).normalize()
|
||
df = df[~df.index.duplicated(keep="last")]
|
||
|
||
out = pd.DataFrame(index=df.index)
|
||
out.index.name = "Date"
|
||
out["AdjClose"] = df[col_px].astype(float)
|
||
return out.sort_index()
|
||
|
||
|
||
# ============================================================
|
||
# Hurst (RS + DFA) su returns
|
||
# ============================================================
|
||
|
||
def hurst_rs_returns(r: np.ndarray, win_grid=None, min_seg=1) -> float:
|
||
r = pd.Series(r).dropna().astype("float64").values
|
||
n = len(r)
|
||
if n < 200:
|
||
return np.nan
|
||
|
||
if win_grid is None:
|
||
base = np.array([16, 24, 32, 48, 64, 96, 128, 192, 256, 384], dtype=int)
|
||
win_grid = [w for w in base if w <= n // 2]
|
||
|
||
RS_vals, sizes = [], []
|
||
for w in win_grid:
|
||
m = n // w
|
||
if w < 8 or m < min_seg:
|
||
continue
|
||
rs_list = []
|
||
for i in range(m):
|
||
seg = r[i * w : (i + 1) * w]
|
||
seg = seg - np.mean(seg)
|
||
sd = seg.std(ddof=1)
|
||
if sd == 0 or not np.isfinite(sd):
|
||
continue
|
||
y = np.cumsum(seg)
|
||
rs = (np.max(y) - np.min(y)) / sd
|
||
if np.isfinite(rs) and rs > 0:
|
||
rs_list.append(rs)
|
||
if rs_list:
|
||
RS_vals.append(np.mean(rs_list))
|
||
sizes.append(w)
|
||
|
||
if len(RS_vals) < 3:
|
||
return np.nan
|
||
|
||
sizes = np.array(sizes, float)
|
||
RS_vals = np.array(RS_vals, float)
|
||
mask = np.isfinite(RS_vals) & (RS_vals > 0)
|
||
sizes, RS_vals = sizes[mask], RS_vals[mask]
|
||
if sizes.size < 3:
|
||
return np.nan
|
||
|
||
slope, _ = np.polyfit(np.log(sizes), np.log(RS_vals), 1)
|
||
return float(np.clip(slope, 0.0, 1.0)) if np.isfinite(slope) else np.nan
|
||
|
||
|
||
def hurst_dfa_returns(r: np.ndarray, win_grid=None) -> float:
|
||
r = pd.Series(r).dropna().astype("float64").values
|
||
n = len(r)
|
||
if n < 200:
|
||
return np.nan
|
||
|
||
y = np.cumsum(r - np.mean(r))
|
||
|
||
if win_grid is None:
|
||
base = np.array([16, 24, 32, 48, 64, 96, 128, 192, 256], dtype=int)
|
||
win_grid = [w for w in base if w <= n // 2]
|
||
|
||
F_vals, sizes = [], []
|
||
for s in win_grid:
|
||
m = n // s
|
||
if s < 8 or m < 2:
|
||
continue
|
||
rms_list = []
|
||
for i in range(m):
|
||
seg = y[i * s : (i + 1) * s]
|
||
t = np.arange(s, dtype=float)
|
||
A = np.vstack([t, np.ones(s)]).T
|
||
coeff, *_ = np.linalg.lstsq(A, seg, rcond=None)
|
||
detr = seg - (A @ coeff)
|
||
rms = np.sqrt(np.mean(detr**2))
|
||
if np.isfinite(rms) and rms > 0:
|
||
rms_list.append(rms)
|
||
if rms_list:
|
||
F_vals.append(np.mean(rms_list))
|
||
sizes.append(s)
|
||
|
||
if len(F_vals) < 3:
|
||
return np.nan
|
||
|
||
sizes = np.array(sizes, float)
|
||
F_vals = np.array(F_vals, float)
|
||
mask = np.isfinite(F_vals) & (F_vals > 0)
|
||
sizes, F_vals = sizes[mask], F_vals[mask]
|
||
if sizes.size < 3:
|
||
return np.nan
|
||
|
||
slope, _ = np.polyfit(np.log(sizes), np.log(F_vals), 1)
|
||
return float(np.clip(slope, 0.0, 1.0)) if np.isfinite(slope) else np.nan
|
||
|
||
|
||
# ============================================================
|
||
# Wavelet denoise (ONLINE, NO look-ahead)
|
||
# ============================================================
|
||
|
||
def wavelet_denoise_last_online(x: np.ndarray) -> float:
|
||
"""Denoise usando SOLO la storia fino a t. Ritorna l'ultimo punto denoisato."""
|
||
x = np.asarray(x, dtype=float)
|
||
n = x.size
|
||
if n < DENOISE_MIN_LEN:
|
||
return float(x[-1])
|
||
|
||
s = pd.Series(x).ffill().bfill().values
|
||
|
||
wav = pywt.Wavelet(DENOISE_WAVELET)
|
||
max_level = pywt.dwt_max_level(n, wav.dec_len)
|
||
level = int(min(DENOISE_LEVEL, max_level)) if max_level > 0 else 1
|
||
|
||
coeffs = pywt.wavedec(s, DENOISE_WAVELET, level=level)
|
||
|
||
detail = coeffs[-1]
|
||
if detail.size == 0:
|
||
return float(s[-1])
|
||
|
||
sigma = np.median(np.abs(detail - np.median(detail))) / 0.6745
|
||
if not np.isfinite(sigma) or sigma <= 0:
|
||
return float(s[-1])
|
||
|
||
thr = sigma * np.sqrt(2 * np.log(n))
|
||
|
||
coeffs_th = [coeffs[0]]
|
||
for d in coeffs[1:]:
|
||
coeffs_th.append(pywt.threshold(d, thr, mode=DENOISE_THRESHOLD_MODE))
|
||
|
||
rec = pywt.waverec(coeffs_th, DENOISE_WAVELET)
|
||
rec = rec[:n]
|
||
return float(rec[-1])
|
||
|
||
|
||
# ============================================================
|
||
# Per-asset forward EOD backtest
|
||
# ============================================================
|
||
|
||
def forward_backtest_one_asset(close: pd.Series, theta_entry: float, allow_short: bool) -> pd.DataFrame:
|
||
"""Backtest EOD:
|
||
|
||
- Segnale calcolato a close(t) (si usa finestra di returns che include il return di t).
|
||
- Esecuzione al close(t).
|
||
- PnL per step t = Signal(t) * Ret_fwd(t) - fee * abs(diff(Signal)).
|
||
dove Ret_fwd(t) = close(t+1)/close(t)-1.
|
||
|
||
Output per Date=t con Ret_fwd già allineato.
|
||
"""
|
||
|
||
close = pd.to_numeric(close, errors="coerce").dropna()
|
||
close = close[~close.index.duplicated(keep="last")].sort_index()
|
||
|
||
# returns log per pattern matching
|
||
r_log = np.log(close / close.shift(1))
|
||
r_log = r_log.dropna()
|
||
|
||
# forward return (close-to-close): Ret_fwd[t] = close[t+1]/close[t]-1
|
||
ret_fwd = close.pct_change().shift(-1)
|
||
|
||
# Allineamento su date comuni
|
||
idx = r_log.index.intersection(ret_fwd.index)
|
||
r_log = r_log.loc[idx]
|
||
ret_fwd = ret_fwd.loc[idx]
|
||
|
||
pos = 0
|
||
entry_t = None
|
||
trade_pnl = 0.0
|
||
trade_peak = 0.0
|
||
|
||
est_hist = []
|
||
rows = []
|
||
|
||
# t è indice relativo su r_log/ret_fwd
|
||
for t in range(WP, len(r_log) - 1):
|
||
dt = r_log.index[t]
|
||
|
||
past = r_log.iloc[:t]
|
||
if past.dropna().shape[0] < (WP + HA):
|
||
rows.append((dt, pos, np.nan, np.nan, np.nan, float(ret_fwd.iloc[t])))
|
||
continue
|
||
|
||
lib_wins, lib_out = build_pattern_library(past, WP, HA)
|
||
if lib_wins is None or lib_out is None or len(lib_out) == 0:
|
||
rows.append((dt, pos, np.nan, np.nan, np.nan, float(ret_fwd.iloc[t])))
|
||
continue
|
||
|
||
win_last = r_log.iloc[t - WP : t].values
|
||
curr_zn = z_norm(win_last)
|
||
if curr_zn is None:
|
||
rows.append((dt, pos, np.nan, np.nan, np.nan, float(ret_fwd.iloc[t])))
|
||
continue
|
||
|
||
est_raw, avg_dist, _ = predict_from_library(curr_zn, lib_wins, lib_out, k=KNN_K)
|
||
est_raw = float(est_raw)
|
||
avg_dist = float(avg_dist)
|
||
|
||
est_hist.append(est_raw)
|
||
est_use = wavelet_denoise_last_online(np.array(est_hist, dtype=float)) if DENOISE_ENABLED else est_raw
|
||
|
||
# ===== ENTRY =====
|
||
if pos == 0:
|
||
if est_use > theta_entry:
|
||
pos = 1
|
||
entry_t = t
|
||
trade_pnl = 0.0
|
||
trade_peak = 0.0
|
||
elif allow_short and est_use < -theta_entry:
|
||
pos = -1
|
||
entry_t = t
|
||
trade_pnl = 0.0
|
||
trade_peak = 0.0
|
||
|
||
# ===== EXIT (EOD approximation: usa Ret_fwd della prossima barra t per aggiornare pnl) =====
|
||
else:
|
||
next_ret = float(ret_fwd.iloc[t])
|
||
signed_next = next_ret if pos == 1 else (-next_ret)
|
||
|
||
pnl_if_stay = (1.0 + trade_pnl) * (1.0 + signed_next) - 1.0
|
||
peak_if_stay = max(trade_peak, pnl_if_stay)
|
||
|
||
exit_now = False
|
||
|
||
if SL_BPS is not None and pnl_if_stay <= -SL_BPS / 10000.0:
|
||
exit_now = True
|
||
if TP_BPS is not None and pnl_if_stay >= TP_BPS / 10000.0:
|
||
exit_now = True
|
||
if TRAIL_BPS is not None and (peak_if_stay - pnl_if_stay) >= TRAIL_BPS / 10000.0:
|
||
exit_now = True
|
||
if TIME_STOP_BARS is not None and entry_t is not None and (t - entry_t + 1) >= TIME_STOP_BARS:
|
||
exit_now = True
|
||
|
||
# Theta exit simmetrico
|
||
if THETA_EXIT is not None:
|
||
if pos == 1:
|
||
is_weak = est_use <= THETA_EXIT
|
||
else:
|
||
is_weak = est_use >= -THETA_EXIT
|
||
if is_weak:
|
||
exit_now = True
|
||
|
||
if exit_now:
|
||
pos = 0
|
||
entry_t = None
|
||
trade_pnl = 0.0
|
||
trade_peak = 0.0
|
||
else:
|
||
trade_pnl = pnl_if_stay
|
||
trade_peak = peak_if_stay
|
||
|
||
rows.append((dt, pos, est_raw, est_use, avg_dist, float(ret_fwd.iloc[t])))
|
||
|
||
df = pd.DataFrame(
|
||
rows,
|
||
columns=["Date", "Signal", "EstOutcomeRaw", "EstOutcome", "AvgDist", "Ret_fwd"],
|
||
).set_index("Date")
|
||
|
||
fee = FEE_BPS / 10000.0
|
||
trade_chg = df["Signal"].diff().abs().fillna(0.0)
|
||
df["PnL"] = df["Signal"] * df["Ret_fwd"] - trade_chg * fee
|
||
|
||
return df
|
||
|
||
|
||
# ============================================================
|
||
# Portfolio utilities
|
||
# ============================================================
|
||
|
||
def equity_from_returns(r: pd.Series) -> pd.Series:
|
||
r = pd.to_numeric(r, errors="coerce").fillna(0.0)
|
||
return (1 + r).cumprod() * 100
|
||
|
||
|
||
def monthly_returns(r: pd.Series) -> pd.Series:
|
||
r = pd.to_numeric(r, errors="coerce").fillna(0.0)
|
||
return (1 + r).resample("M").prod() - 1
|
||
|
||
|
||
def plot_heatmap_monthly(r: pd.Series, title: str):
|
||
m = monthly_returns(r)
|
||
df = m.to_frame("ret")
|
||
df["Year"], df["Month"] = df.index.year, df.index.month
|
||
pv = df.pivot(index="Year", columns="Month", values="ret")
|
||
|
||
fig, ax = plt.subplots(figsize=(10, 6))
|
||
im = ax.imshow(pv.fillna(0) * 100, aspect="auto")
|
||
for i in range(pv.shape[0]):
|
||
for j in range(pv.shape[1]):
|
||
val = pv.iloc[i, j]
|
||
if not np.isnan(val):
|
||
ax.text(j, i, f"{val*100:.1f}", ha="center", va="center", fontsize=8)
|
||
ax.set_title(title)
|
||
ax.set_xlabel("Mese")
|
||
ax.set_ylabel("Anno")
|
||
ax.set_xticks(range(12))
|
||
ax.set_xticklabels(range(1, 13))
|
||
fig.colorbar(im, ax=ax, label="%")
|
||
plt.tight_layout()
|
||
return fig
|
||
|
||
|
||
def inverse_vol_weights(returns_df: pd.DataFrame, window: int, max_weight: float | None) -> pd.DataFrame:
|
||
vol = returns_df.rolling(window).std()
|
||
inv = 1 / vol.replace(0, np.nan)
|
||
w = inv.div(inv.sum(axis=1), axis=0)
|
||
w = w.ffill()
|
||
if max_weight is not None:
|
||
w = w.clip(upper=max_weight)
|
||
return w
|
||
|
||
|
||
def make_active_weights(w_target: pd.DataFrame, sig: pd.DataFrame, add_cash: bool = True, cash_label: str = "Cash") -> pd.DataFrame:
|
||
# Attivo se Signal != 0 (sia long che short)
|
||
w = (w_target * (sig != 0)).fillna(0.0)
|
||
if add_cash:
|
||
w[cash_label] = (1.0 - w.sum(axis=1)).clip(lower=0.0)
|
||
return w
|
||
|
||
|
||
# ============================================================
|
||
# FX currency exposure cap (net exposure per currency)
|
||
# ============================================================
|
||
|
||
def parse_fx_pair(ticker: str) -> tuple[str, str] | None:
|
||
# "EURUSD Curncy" -> ("EUR","USD")
|
||
if not isinstance(ticker, str):
|
||
return None
|
||
pair = ticker.split()[0].strip().upper()
|
||
if len(pair) < 6:
|
||
return None
|
||
return pair[:3], pair[3:6]
|
||
|
||
|
||
def currency_exposure_from_weights(weights_row: pd.Series, sig_row: pd.Series) -> dict[str, float]:
|
||
"""Net exposure per currency, considerando anche la direzione (long/short).
|
||
|
||
Long EURUSD (sig=+1): +w EUR, -w USD
|
||
Short EURUSD (sig=-1): -w EUR, +w USD
|
||
"""
|
||
exp: dict[str, float] = {}
|
||
for tkr, w in weights_row.items():
|
||
if tkr == "Cash":
|
||
continue
|
||
s = float(sig_row.get(tkr, 0.0))
|
||
if s == 0 or w == 0 or not np.isfinite(w):
|
||
continue
|
||
pq = parse_fx_pair(tkr)
|
||
if pq is None:
|
||
continue
|
||
base, quote = pq
|
||
exp[base] = exp.get(base, 0.0) + w * s
|
||
exp[quote] = exp.get(quote, 0.0) - w * s
|
||
return exp
|
||
|
||
|
||
def apply_currency_cap(weights_act: pd.DataFrame, sig: pd.DataFrame, cap: float) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||
"""Applica cap |exposure(currency)| <= cap.
|
||
|
||
Metodo conservativo ma robusto (NO look-ahead):
|
||
- Se max_abs_exposure > cap, scala TUTTE le posizioni risky di fattore cap/max_abs_exposure,
|
||
e manda il residuo a Cash.
|
||
|
||
Ritorna:
|
||
- weights_capped (con Cash)
|
||
- exposure_wide (diagnostica: esposizioni per currency)
|
||
"""
|
||
w = weights_act.copy()
|
||
if "Cash" not in w.columns:
|
||
w["Cash"] = (1.0 - w.sum(axis=1)).clip(lower=0.0)
|
||
|
||
tick_cols = [c for c in w.columns if c != "Cash"]
|
||
|
||
# raccogliamo anche esposizioni
|
||
all_ccy = set()
|
||
exp_dicts: dict[pd.Timestamp, dict[str, float]] = {}
|
||
|
||
for dt in w.index:
|
||
sig_row = sig.loc[dt] if dt in sig.index else pd.Series(dtype=float)
|
||
wr = w.loc[dt, tick_cols].fillna(0.0)
|
||
|
||
exp = currency_exposure_from_weights(wr, sig_row)
|
||
exp_dicts[pd.to_datetime(dt)] = exp
|
||
all_ccy.update(exp.keys())
|
||
|
||
if not exp:
|
||
w.loc[dt, "Cash"] = (1.0 - float(wr.sum())).clip(lower=0.0) if hasattr(float(wr.sum()), 'clip') else max(0.0, 1.0 - float(wr.sum()))
|
||
continue
|
||
|
||
max_abs = max(abs(v) for v in exp.values())
|
||
if np.isfinite(max_abs) and max_abs > cap and max_abs > 1e-12:
|
||
scale = cap / max_abs
|
||
wr2 = wr * scale
|
||
w.loc[dt, tick_cols] = wr2.values
|
||
w.loc[dt, "Cash"] = max(0.0, 1.0 - float(wr2.sum()))
|
||
else:
|
||
w.loc[dt, "Cash"] = max(0.0, 1.0 - float(wr.sum()))
|
||
|
||
# exposure wide
|
||
all_ccy = sorted(all_ccy)
|
||
exp_wide = pd.DataFrame(0.0, index=w.index, columns=all_ccy)
|
||
for dt, ex in exp_dicts.items():
|
||
if dt in exp_wide.index:
|
||
for c, v in ex.items():
|
||
exp_wide.loc[dt, c] = v
|
||
|
||
return w, exp_wide
|
||
|
||
|
||
# ============================================================
|
||
# Ranking / scoring (semplice, robusto)
|
||
# ============================================================
|
||
|
||
def drawdown_stats_simple(ret: pd.Series) -> dict:
|
||
ret = pd.to_numeric(ret, errors="coerce").fillna(0.0)
|
||
eq = (1 + ret).cumprod()
|
||
peak = eq.cummax()
|
||
dd = eq / peak - 1.0
|
||
|
||
cagr = eq.iloc[-1] ** (DAYS_PER_YEAR / max(1, len(eq))) - 1
|
||
annvol = ret.std() * np.sqrt(DAYS_PER_YEAR)
|
||
sharpe = (ret.mean() / (ret.std() + 1e-12)) * np.sqrt(DAYS_PER_YEAR)
|
||
maxdd = dd.min() if len(dd) else np.nan
|
||
calmar = (cagr / abs(maxdd)) if (np.isfinite(cagr) and np.isfinite(maxdd) and maxdd < 0) else np.nan
|
||
|
||
return {
|
||
"CAGR_%": float(np.round(cagr * 100, 3)) if np.isfinite(cagr) else np.nan,
|
||
"AnnVol_%": float(np.round(annvol * 100, 3)) if np.isfinite(annvol) else np.nan,
|
||
"Sharpe": float(np.round(sharpe, 3)) if np.isfinite(sharpe) else np.nan,
|
||
"MaxDD_%": float(np.round(maxdd * 100, 3)) if np.isfinite(maxdd) else np.nan,
|
||
"Calmar": float(np.round(calmar, 3)) if np.isfinite(calmar) else np.nan,
|
||
}
|
||
|
||
|
||
def compute_score(summary_df: pd.DataFrame) -> pd.DataFrame:
|
||
"""Score semplice e stabile (senza ottimizzazioni fancy):
|
||
|
||
- alto Sharpe e CAGR, basso MaxDD
|
||
- normalizzazione via rank percentili
|
||
|
||
Mantiene colonne di diagnostica (mode) per trasparenza.
|
||
"""
|
||
df = summary_df.copy()
|
||
|
||
# ranks 0..1
|
||
def rnk(s):
|
||
s = pd.to_numeric(s, errors="coerce")
|
||
n = s.notna().sum()
|
||
if n <= 1:
|
||
return pd.Series(np.nan, index=s.index)
|
||
return s.rank(method="average") / n
|
||
|
||
sharpe_r = rnk(df.get("Sharpe"))
|
||
cagr_r = rnk(df.get("CAGR_%"))
|
||
maxdd_r = rnk(-pd.to_numeric(df.get("MaxDD_%"), errors="coerce")) # meno negativo = meglio
|
||
|
||
df["Score_mode"] = "rank_sharpe_cagr_maxdd"
|
||
df["Score"] = (0.5 * sharpe_r.fillna(0) + 0.3 * cagr_r.fillna(0) + 0.2 * maxdd_r.fillna(0))
|
||
return df
|
||
|
||
|
||
# ============================================================
|
||
# MAIN
|
||
# ============================================================
|
||
|
||
def main():
|
||
print("\n=== Trading Pattern Recon w Hurst - Forex (CORRETTO) ===")
|
||
print(f"Fee: {FEE_BPS} bp | Short: {ALLOW_SHORT} | Currency cap: {CURRENCY_CAP:.2f}")
|
||
print(f"Wavelet denoise: {DENOISE_ENABLED} ({DENOISE_WAVELET}, level={DENOISE_LEVEL}, min_len={DENOISE_MIN_LEN})")
|
||
print("Esecuzione: close(t), PnL: close(t+1)/close(t)\n")
|
||
|
||
# 1) Fetch prices
|
||
prices: dict[str, pd.DataFrame] = {}
|
||
for tkr in TICKERS:
|
||
try:
|
||
print(f"Fetching {tkr} ...")
|
||
prices[tkr] = fetch_price_series(tkr, FROM_DATE)
|
||
except Exception as e:
|
||
print(f"[WARN] Skip {tkr}: {e}")
|
||
|
||
if len(prices) < 5:
|
||
raise RuntimeError(f"Pochi ticker validi ({len(prices)}).")
|
||
|
||
# 2) Per ticker backtest
|
||
hurst_rows = []
|
||
summary_rows = []
|
||
sig_rows = []
|
||
|
||
for tkr, dfp in prices.items():
|
||
if "AdjClose" not in dfp.columns:
|
||
continue
|
||
|
||
close = dfp["AdjClose"].copy()
|
||
close = pd.to_numeric(close, errors="coerce").dropna()
|
||
if len(close) < (WP + HA + 80):
|
||
print(f"[WARN] Serie troppo corta per {tkr}: len={len(close)}")
|
||
continue
|
||
|
||
r_log = np.log(close / close.shift(1)).dropna()
|
||
if len(r_log) < 250:
|
||
print(f"[WARN] Serie troppo corta per Hurst per {tkr}")
|
||
|
||
h_rs = hurst_rs_returns(r_log.values)
|
||
h_dfa = hurst_dfa_returns(r_log.values)
|
||
H = np.nanmedian([h_rs, h_dfa])
|
||
H = float(H) if np.isfinite(H) else np.nan
|
||
theta_entry = (H / 100.0) if np.isfinite(H) else THETA_FALLBACK
|
||
|
||
hurst_rows.append({"Ticker": tkr, "Hurst": H, "theta_entry": float(theta_entry)})
|
||
|
||
bt = forward_backtest_one_asset(close=close, theta_entry=float(theta_entry), allow_short=ALLOW_SHORT)
|
||
bt = bt.copy()
|
||
bt.insert(0, "Ticker", tkr)
|
||
sig_rows.append(bt.reset_index())
|
||
|
||
stats = drawdown_stats_simple(bt["PnL"])
|
||
hit = 100.0 * ((bt["PnL"] > 0).sum() / max(1, bt["PnL"].notna().sum()))
|
||
turnover = bt["Signal"].diff().abs().fillna(0.0).mean() * 100.0 # % steps changed (0..200)
|
||
|
||
stats.update({
|
||
"Ticker": tkr,
|
||
"HitRate_%": float(np.round(hit, 3)),
|
||
"AvgStepRet_bps": float(np.round(bt["PnL"].mean() * 10000, 3)),
|
||
"Turnover_%/day": float(np.round(turnover, 3)),
|
||
"N_Steps": int(bt.shape[0]),
|
||
"theta_entry": float(theta_entry),
|
||
"theta_exit": float(THETA_EXIT) if THETA_EXIT is not None else np.nan,
|
||
"sl_bps": float(SL_BPS) if SL_BPS is not None else np.nan,
|
||
"tp_bps": float(TP_BPS) if TP_BPS is not None else np.nan,
|
||
"trail_bps": float(TRAIL_BPS) if TRAIL_BPS is not None else np.nan,
|
||
"time_stop_bars": int(TIME_STOP_BARS) if TIME_STOP_BARS is not None else np.nan,
|
||
"allow_short": bool(ALLOW_SHORT),
|
||
"fee_bps": float(FEE_BPS),
|
||
"wavelet": DENOISE_WAVELET if DENOISE_ENABLED else "none",
|
||
"currency_cap": float(CURRENCY_CAP),
|
||
})
|
||
summary_rows.append(stats)
|
||
|
||
if not sig_rows:
|
||
raise RuntimeError("Nessun ticker backtestato con successo")
|
||
|
||
hurst_df = pd.DataFrame(hurst_rows).sort_values("Ticker").reset_index(drop=True)
|
||
summary_df = pd.DataFrame(summary_rows).sort_values("Ticker").reset_index(drop=True)
|
||
signals_df = pd.concat(sig_rows, ignore_index=True)
|
||
signals_df["Date"] = pd.to_datetime(signals_df["Date"]).dt.normalize()
|
||
|
||
# 3) Ranking / Score / TopN
|
||
ranking_df = compute_score(summary_df).sort_values("Score", ascending=False).reset_index(drop=True)
|
||
top_tickers = ranking_df.head(TOP_N)["Ticker"].astype(str).tolist()
|
||
print(f"\nTop{TOP_N} tickers (dal ranking): {top_tickers}\n")
|
||
|
||
# 4) Costruzione matrici wide
|
||
wide_pnl = signals_df.pivot_table(index="Date", columns="Ticker", values="PnL", aggfunc="sum").fillna(0.0).sort_index()
|
||
wide_sig = signals_df.pivot_table(index="Date", columns="Ticker", values="Signal", aggfunc="last").fillna(0).astype(int).sort_index()
|
||
wide_est = signals_df.pivot_table(index="Date", columns="Ticker", values="EstOutcome", aggfunc="last").sort_index()
|
||
|
||
# 5) Selezione dinamica TopN giornaliera: manteniamo lo schema classico (attivi + ranking statico)
|
||
# - Equal Weight: 1/N sui tickers del ranking (TopN statico)
|
||
# - Risk Parity: inverse-vol sui rendimenti dei tickers TopN statici
|
||
|
||
cols = [c for c in top_tickers if c in wide_pnl.columns]
|
||
if not cols:
|
||
raise RuntimeError("Nessun ticker del TopN presente in wide_pnl")
|
||
|
||
dates = wide_pnl.index
|
||
|
||
# target weights
|
||
w_eq_target = pd.DataFrame(0.0, index=dates, columns=wide_pnl.columns)
|
||
w_eq_target.loc[:, cols] = 1.0 / len(cols)
|
||
|
||
rp_hist = wide_pnl[cols]
|
||
w_rp_hist = inverse_vol_weights(rp_hist, window=RP_LOOKBACK, max_weight=RP_MAX_WEIGHT).reindex(dates).ffill().fillna(0.0)
|
||
|
||
w_rp_target = pd.DataFrame(0.0, index=dates, columns=wide_pnl.columns)
|
||
w_rp_target.loc[:, cols] = w_rp_hist.values
|
||
# rinormalizza sui cols
|
||
s = w_rp_target[cols].sum(axis=1).replace(0, np.nan)
|
||
w_rp_target.loc[:, cols] = w_rp_target[cols].div(s, axis=0).fillna(0.0)
|
||
|
||
# active weights (mask su Signal != 0) + cash
|
||
w_eq_act = make_active_weights(w_eq_target, wide_sig, add_cash=True)
|
||
w_rp_act = make_active_weights(w_rp_target, wide_sig, add_cash=True)
|
||
|
||
# currency cap
|
||
w_eq_cap, ccy_eq = apply_currency_cap(w_eq_act, wide_sig, cap=CURRENCY_CAP)
|
||
w_rp_cap, ccy_rp = apply_currency_cap(w_rp_act, wide_sig, cap=CURRENCY_CAP)
|
||
|
||
# portfolio returns (solo risky assets)
|
||
ret_eq = (wide_pnl * w_eq_cap.drop(columns=["Cash"], errors="ignore")).sum(axis=1).rename("Ret_EqW_TopN")
|
||
ret_rp = (wide_pnl * w_rp_cap.drop(columns=["Cash"], errors="ignore")).sum(axis=1).rename("Ret_RP_TopN")
|
||
|
||
eq_eq = equity_from_returns(ret_eq).rename("Eq_EqW_TopN")
|
||
eq_rp = equity_from_returns(ret_rp).rename("Eq_RP_TopN")
|
||
|
||
# 6) Plots
|
||
plt.figure(figsize=(10, 5))
|
||
plt.plot(eq_eq, label=f"Equal Weight (Top{TOP_N}, fee {FEE_BPS}bp)")
|
||
plt.plot(eq_rp, label=f"Risk Parity (Top{TOP_N}, cap {RP_MAX_WEIGHT:.4f}, fee {FEE_BPS}bp)")
|
||
plt.title(f"Equity line portafogli FX (base 100) – Top{TOP_N} (shared_utils + URL)")
|
||
plt.grid(True)
|
||
plt.legend()
|
||
plt.tight_layout()
|
||
plt.show()
|
||
|
||
plot_heatmap_monthly(ret_eq, f"Heatmap mensile – Equal Weight FX (Top{TOP_N})")
|
||
plt.show()
|
||
|
||
plot_heatmap_monthly(ret_rp, f"Heatmap mensile – Risk Parity FX (Top{TOP_N})")
|
||
plt.show()
|
||
|
||
# 7) Export
|
||
hurst_df.to_csv(OUT_DIR / "hurst.csv", index=False)
|
||
summary_df.to_csv(OUT_DIR / "forward_bt_summary.csv", index=False)
|
||
signals_df.to_csv(OUT_DIR / "forward_bt_signals.csv", index=False)
|
||
|
||
ranking_df.to_csv(OUT_DIR / "ranking_score.csv", index=False)
|
||
pd.Series(top_tickers, name="TopN_Tickers").to_csv(OUT_DIR / "topn_tickers.csv", index=False)
|
||
|
||
pd.concat([ret_eq, ret_rp, eq_eq, eq_rp], axis=1).to_csv(OUT_DIR / "portfolio_daily.csv")
|
||
|
||
w_eq_cap.to_csv(OUT_DIR / "weights_eq_active_capped.csv")
|
||
w_rp_cap.to_csv(OUT_DIR / "weights_rp_active_capped.csv")
|
||
|
||
ccy_eq.to_csv(OUT_DIR / "currency_exposure_eq.csv")
|
||
ccy_rp.to_csv(OUT_DIR / "currency_exposure_rp.csv")
|
||
|
||
print(f"\nSaved to: {OUT_DIR.resolve()}\n")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|