eseguiti fix e introdotta versione wavelets

This commit is contained in:
fredmaloggia
2025-12-09 20:35:39 +01:00
parent 9543d63591
commit 71c796b1f1
4 changed files with 2805 additions and 269 deletions

View File

@@ -1391,13 +1391,6 @@ for c in ["CAGR_%","Sharpe","Calmar","MaxDD_%eq","HitRate_%"]:
if c in df_sum.columns:
df_sum[c] = pd.to_numeric(df_sum[c], errors="coerce")
# def is_crypto(row):
# txt = f"{row.get('Nome','')} {row.get('Categoria','')} {row.get('Asset Class','')}".lower()
# return any(k in txt for k in ["crypto","cripto","bitcoin","btc","ether","eth"])
# if "is_crypto" not in df_sum.columns:
# df_sum["is_crypto"] = df_sum.apply(is_crypto, axis=1)
def _safe_rank(s: pd.Series):
s = pd.to_numeric(s, errors="coerce")
if s.notna().sum() == 0:
@@ -1616,15 +1609,6 @@ if need_rebuild:
except Exception as e:
print(f"[WARN] Ricostruzione metriche fallita: {e}")
# Flag crypto se manca
if "is_crypto" not in df_sum.columns:
def _is_crypto_row(row):
txt = f"{row.get('Nome','')} {row.get('Categoria','')} {row.get('Asset Class','')}".lower()
return any(k in txt for k in ["crypto","cripto","bitcoin","btc","ether","eth"])
df_sum["is_crypto"] = df_sum.apply(_is_crypto_row, axis=1)
df_sum = _apply_score(df_sum)
TOP_N = 15
@@ -1635,8 +1619,6 @@ base_isins = (
)
# Nessuna strategia cripto separata: le criptovalute sono trattate come gli altri asset
crypto_isin = None
print(f"[INFO] Ranking full-sample (solo debug, i portafogli usano ranking rolling): {base_isins}")
# -----------------------------
@@ -1708,7 +1690,6 @@ def plot_portfolio_composition(weights: pd.DataFrame,
Esempio:
plot_portfolio_composition(w_eq, "Equal Weight", "composition_equal_weight.png")
plot_portfolio_composition(w_rp, "Risk Parity", "composition_risk_parity.png")
plot_portfolio_composition(w_agg, "Aggressiva + Crypto", "composition_agg_crypto.png")
"""
import os
import numpy as np
@@ -2061,10 +2042,9 @@ def plot_portfolio_composition_fixed(weights: pd.DataFrame,
# Plot salvato senza visualizzazione interattiva
# --- 1) Pesi teorici dei tre portafogli (già costruiti sopra) ---
# --- 1) Pesi teorici dei portafogli (già costruiti sopra) ---
# w_eq : equal weight su 'cols'
# w_rp : risk parity (weights_rp)
# w_agg: 85% equal + 15% crypto se disponibile
def _sanitize_weights(W: pd.DataFrame, index_like: pd.Index) -> pd.DataFrame:
if W is None or W.empty:
@@ -2079,18 +2059,14 @@ if 'w_eq' not in globals():
w_eq = pd.DataFrame(index=wide_pnl.index, columns=wide_pnl.columns)
if 'w_rp' not in globals():
w_rp = weights_rp.copy() if isinstance(weights_rp, pd.DataFrame) else pd.DataFrame(index=wide_pnl.index, columns=wide_pnl.columns)
if 'w_agg' not in globals():
w_agg = w_eq.copy()
w_eq = _sanitize_weights(w_eq, wide_pnl.index)
w_rp = _sanitize_weights(w_rp, wide_pnl.index)
w_agg = _sanitize_weights(w_agg, wide_pnl.index)
# --- 2) Pesi ATTIVI (mascherati con i Signal) ---
# renorm_to_1=False → lascia la quota NON investita in 'Cash'
w_eq_act = make_active_weights(w_eq, wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash")
w_rp_act = make_active_weights(w_rp, wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash")
w_agg_act = make_active_weights(w_agg, wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash")
# Export pesi giornalieri (Equal/Risk Parity) con cash normalizzato a 100%
def _export_weights_daily(w_eq_act_df: pd.DataFrame, w_rp_act_df: pd.DataFrame, path=WEIGHTS_DAILY_XLSX):
@@ -2310,7 +2286,7 @@ import numpy as np
def rebuild_daily_from_trades_dict(trades_dict):
"""
trades_dict: {'Equal_Weight': df, 'Risk_Parity': df, 'Aggressiva_Crypto': df}
trades_dict: {'Equal_Weight': df, 'Risk_Parity': df}
Ogni df deve avere: OpenDate, CloseDate, Size, Duration_bars, PnL_%
Regola: distribuiamo il PnL del trade su ciascun giorno di durata con
un rendimento giornaliero costante r tale che (1+r)^D - 1 = PnL.
@@ -2614,16 +2590,14 @@ except NameError:
DAYS_PER_YEAR = 252
def _select_isins_for_topN(df_sum: pd.DataFrame, top_n: int):
"""Seleziona i migliori 'top_n' ISIN (crypto incluse) in base allo Score."""
"""Seleziona i migliori 'top_n' ISIN in base allo Score."""
df_sum_loc = df_sum.copy()
base_isins_N = (
df_sum_loc
.sort_values("Score", ascending=False)
.head(top_n)["ISIN"].astype(str).str.strip().tolist()
)
# Nessuna crypto separata: tutto trattato allo stesso modo
crypto_isin_N = None
return base_isins_N, crypto_isin_N
return base_isins_N
def _build_portfolio_returns_for_isins(base_isins_N, wide_pnl):
"""
@@ -2658,244 +2632,30 @@ def _build_portfolio_returns_for_isins(base_isins_N, wide_pnl):
return ret_eq_N, ret_rp_N
# # --- calcolo metriche per TopN 8..15 --- (DISATTIVATO)
# rows_byN = []
# for top_n in range(8, 16):
# portN = _get_dynamic_portfolio(top_n)
# ret_eq_N = portN["ret_eq"]
# ret_rp_N = portN["ret_rp"]
#
# # (OPZIONALE) se vuoi anche salvare equity/heatmap per ciascun N:
# # _save_equity_plot_byN(ret_eq_N, ret_rp_N, top_n)
# # _save_heatmaps_byN(ret_eq_N, ret_rp_N, top_n)
#
# # Calcola le metriche (come nell'ottimizzatore)
# for strategy_name, rser in [
# ("Equal_Weight", ret_eq_N),
# ("Risk_Parity", ret_rp_N),
# ]:
# m = _calc_all_metrics_from_returns(rser)
# m["TopN"] = top_n
# m["Strategy"] = strategy_name
# rows_byN.append(m)
#
# # DataFrame finale con la colonna TopN
# final_byN_df = pd.DataFrame(rows_byN)[[
# "TopN", "Strategy",
# "Rendimento_Ann", "Volatilita_Ann", "CAGR", "R2_Equity",
# "MaxDD", "DD_Duration_Max", "TTR_from_MDD",
# "AAW", "AUW", "Heal_Index", "H_min_100m_5Y"
# ]].sort_values(["TopN","Strategy"]).reset_index(drop=True)
#
# # Salvataggio: aggiunge/riscrive i fogli in final_metrics.xlsx
# # - mantiene (se vuoi) anche il foglio "Portfolio_Metrics" del caso corrente TOP_N
# try:
# with pd.ExcelWriter(FINAL_METRICS_XLSX, engine="openpyxl", mode="a", if_sheet_exists="replace") as xw:
# final_byN_df.to_excel(xw, "Portfolio_Metrics_By_N", index=False)
# except Exception:
# with pd.ExcelWriter(FINAL_METRICS_XLSX) as xw:
# final_byN_df.to_excel(xw, "Portfolio_Metrics_By_N", index=False)
#
# print(f"✅ Salvato: {FINAL_METRICS_XLSX} (Portfolio_Metrics_By_N) per TopN = 8..15")
# ==============================
# Metriche portafoglio (TOP_N corrente) → Excel
# ==============================
metrics_rows = []
for strategy_name, rser in [
("Equal_Weight", ret_eq),
("Risk_Parity", ret_rp),
]:
m = _calc_all_metrics_from_returns(rser)
m["TopN"] = TOP_N
m["Strategy"] = strategy_name
metrics_rows.append(m)
# # ======================================================================
# # 6bis) Plot per ciascun TopN (8..15): Equity + Heatmap per strategia (DISATTIVATO)
# # ======================================================================
# # import os
# # import numpy as np
# # import matplotlib.pyplot as plt
# #
# # OUT_DIR = PLOT_DIR
# # OUT_DIR.mkdir(parents=True, exist_ok=True)
# #
# # def _safe_series(r: pd.Series) -> pd.Series:
# # """Forza tipo numerico e se tutto NaN, rimpiazza con 0.0 (linea piatta ma plot salvato)."""
# # r = pd.to_numeric(r, errors="coerce")
# # if r.notna().sum() == 0:
# # r = pd.Series(0.0, index=r.index)
# # return r.fillna(0.0)
# #
# # def _save_equity_plot_byN(ret_eq, ret_rp, top_n: int):
# # ret_eq = _safe_series(ret_eq)
# # ret_rp = _safe_series(ret_rp)
# #
# # eq_eq = equity_from_returns(ret_eq)
# # eq_rp = equity_from_returns(ret_rp)
# #
# # if eq_eq.empty and eq_rp.empty:
# # eq_eq = pd.Series([100.0], index=[pd.Timestamp("2000-01-01")])
# #
# # fig, ax = plt.subplots(figsize=(10, 6))
# # eq_eq.plot(ax=ax, label="Equal Weight")
# # eq_rp.plot(ax=ax, label="Risk Parity")
# # ax.legend()
# # ax.grid(True)
# # ax.set_title(f"Equity line - TopN={top_n}")
# # fig.tight_layout()
# # savefig_safe(str(OUT_DIR / f"equity_topN_{top_n}.png"), dpi=150)
# # plt.close(fig)
# #
# # def _save_heatmaps_byN(ret_eq, ret_rp, top_n: int):
# # ret_eq = _safe_series(ret_eq)
# # ret_rp = _safe_series(ret_rp)
# #
# # plot_heatmap_monthly(
# # ret_eq,
# # f"Heatmap mensile - Equal Weight (TopN={top_n})",
# # save_path=OUT_DIR / f"heatmap_equal_topN_{top_n}.png"
# # )
# # plot_heatmap_monthly(
# # ret_rp,
# # f"Heatmap mensile - Risk Parity (TopN={top_n})",
# # save_path=OUT_DIR / f"heatmap_rp_topN_{top_n}.png"
# # )
# #
# # # Loop 8..15 replicando i plot per ciascuna combinazione
# # for top_n in range(8, 16):
# # portN = _get_dynamic_portfolio(top_n)
# # ret_eq_N = portN["ret_eq"]
# # ret_rp_N = portN["ret_rp"]
# #
# # _save_equity_plot_byN(ret_eq_N, ret_rp_N, top_n)
# # _save_heatmaps_byN(ret_eq_N, ret_rp_N, top_n)
# #
# # print(f"✅ Plot salvati in: {OUT_DIR}/")
df_metrics = pd.DataFrame(metrics_rows)[[
"TopN", "Strategy",
"Rendimento_Ann", "Volatilita_Ann", "CAGR", "R2_Equity",
"MaxDD", "DD_Duration_Max", "TTR_from_MDD",
"AAW", "AUW", "Heal_Index", "H_min_100m_5Y",
]]
# ======================================================================
# 6ter) Plot composizione (ATTIVI + Cash) per ciascun TopN (8..15)
# ======================================================================
import os
import numpy as np
import matplotlib.pyplot as plt
OUT_DIR = PLOT_DIR
OUT_DIR.mkdir(parents=True, exist_ok=True)
# -- safety: helper per pesi attivi e plotting, se mancassero già nel file --
if 'make_active_weights' not in globals():
def make_active_weights(w_base: pd.DataFrame,
sig: pd.DataFrame,
renorm_to_1: bool = False,
add_cash: bool = True,
cash_label: str = "Cash") -> pd.DataFrame:
import numpy as np, pandas as pd
if w_base is None or w_base.empty:
return pd.DataFrame(index=sig.index, columns=[])
W = w_base.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0)
S = sig.reindex_like(W).fillna(0).astype(int)
W_active = W * (S > 0)
row_sum = W_active.sum(axis=1)
if renorm_to_1:
W_active = W_active.div(row_sum.replace(0, np.nan), axis=0).fillna(0.0)
if add_cash:
W_active[cash_label] = 0.0
else:
if add_cash:
cash = (1.0 - row_sum).clip(lower=0.0, upper=1.0)
W_active[cash_label] = cash
keep = [c for c in W_active.columns if float(np.abs(W_active[c]).sum()) > 0.0]
return W_active[keep]
if 'plot_portfolio_composition_fixed' not in globals():
def plot_portfolio_composition_fixed(weights: pd.DataFrame,
title: str,
save_path: str | None = None,
max_legend: int = 20):
if weights is None or getattr(weights, "empty", True):
print(f"[SKIP] Nessun peso per: {title}")
return
W = weights.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0)
if W.index.has_duplicates:
W = W[~W.index.duplicated(keep="last")]
W = W.sort_index()
keep_cols = [c for c in W.columns if float(np.abs(W[c]).sum()) > 0.0]
if not keep_cols or len(W.index) < 2:
print(f"[SKIP] Dati insufficienti per: {title}")
return
W = W[keep_cols]
avg_w = W.mean(0).sort_values(ascending=False)
ordered = avg_w.index.tolist()
if "Cash" in ordered:
ordered = [c for c in ordered if c!="Cash"] + ["Cash"]
if len(ordered) > max_legend:
head = ordered[:max_legend]
if "Cash" not in head and "Cash" in ordered:
head = head[:-1] + ["Cash"]
tail = [c for c in ordered if c not in head]
W_show = W[head].copy()
if tail:
W_show["Altri"] = W[tail].sum(1)
ordered = head + ["Altri"]
else:
ordered = head
else:
W_show = W[ordered].copy()
cmap = plt.colormaps.get_cmap("tab20")
colors = [cmap(i % cmap.N) for i in range(len(ordered))]
fig, ax = plt.subplots(figsize=(11, 6))
ax.stackplot(W_show.index, [W_show[c].values for c in ordered], labels=ordered, colors=colors)
ax.set_title(f"Composizione portafoglio nel tempo {title}")
ymax = float(np.nanmax(W_show.sum(1).values))
ax.set_ylim(0, max(1.0, ymax if np.isfinite(ymax) else 1.0))
ax.grid(True, alpha=0.3)
ax.set_ylabel("Peso")
ax.set_yticklabels([f"{y*100:.0f}%" for y in ax.get_yticks()])
ncol = 2 if len(ordered) > 10 else 1
ax.legend(loc="upper left", bbox_to_anchor=(1.01, 1), frameon=False, ncol=ncol, title="ISIN")
fig.tight_layout()
if save_path:
folder = os.path.dirname(save_path) or "."
os.makedirs(folder, exist_ok=True)
fig.savefig(save_path, dpi=150, bbox_inches="tight")
print(f"💾 Salvato: {os.path.abspath(save_path)}")
# Nessuna visualizzazione interattiva
def _build_weights_for_isins(base_isins_N, crypto_isin_N, wide_pnl):
"""Costruisce i pesi TEORICI per Equal / Risk Parity / Aggressiva su un dato insieme di ISIN."""
colsN = [c for c in base_isins_N if c in wide_pnl.columns]
idx = wide_pnl.index
# Equal
if len(colsN) > 0:
w_eq_N = pd.DataFrame(1/len(colsN), index=idx, columns=colsN)
else:
w_eq_N = pd.DataFrame(index=idx, columns=[])
# Risk Parity con cap
if len(colsN) > 0:
w_rp_N = inverse_vol_weights(
wide_pnl[colsN],
window=60,
max_weight=RP_MAX_WEIGHT
)
else:
w_rp_N = pd.DataFrame(index=idx, columns=[])
# Aggressiva + Crypto
if (len(colsN) > 0) and (crypto_isin_N is not None) and (crypto_isin_N in wide_pnl.columns):
cols_agg = colsN + [crypto_isin_N]
w_agg_N = pd.DataFrame(0.0, index=idx, columns=cols_agg)
w_agg_N[colsN] = 0.85/len(colsN)
w_agg_N[crypto_isin_N] = 0.15
else:
w_agg_N = w_eq_N.copy()
# normalizza i TEORICI (solo per sicurezza numerica)
def _norm(W):
if W is None or W.empty:
return pd.DataFrame(index=idx, columns=[])
rs = W.sum(axis=1).replace(0, np.nan)
return W.div(rs, axis=0).fillna(0.0).clip(lower=0.0)
return _norm(w_eq_N), _norm(w_rp_N), _norm(w_agg_N)
# # === Loop 8..15: crea pesi, attiva coi Signal, plotta e SALVA in OUT_DIR === (DISATTIVATO)
# for top_n in range(8, 16):
# portN = _get_dynamic_portfolio(top_n)
# w_eq_act_N = portN["w_eq_act"]
# w_rp_act_N = portN["w_rp_act"]
#
# # path di salvataggio
# sp_eq = OUT_DIR / f"composition_equal_topN_{top_n}.png"
# sp_rp = OUT_DIR / f"composition_rp_topN_{top_n}.png"
#
# # plot + salvataggio (SOLO Equal e Risk Parity)
# plot_portfolio_composition_fixed(w_eq_act_N, f"Equal Weight (attivi + Cash) TopN={top_n}", sp_eq)
# plot_portfolio_composition_fixed(w_rp_act_N, f"Risk Parity (attivi + Cash) TopN={top_n}", sp_rp)
try:
with pd.ExcelWriter(FINAL_METRICS_XLSX, engine="openpyxl", mode="a", if_sheet_exists="replace") as xw:
df_metrics.to_excel(xw, sheet_name="Portfolio_Metrics", index=False)
except Exception:
with pd.ExcelWriter(FINAL_METRICS_XLSX) as xw:
df_metrics.to_excel(xw, sheet_name="Portfolio_Metrics", index=False)
print(f"[INFO] Salvato: {FINAL_METRICS_XLSX} (Portfolio_Metrics)")

File diff suppressed because it is too large Load Diff

View File

@@ -11,6 +11,13 @@
"theta": 0.005,
"embargo": null
},
"wavelet_filter": {
"enabled": true,
"wavelet": "db4",
"level": 4,
"mode": "symmetric",
"threshold_mode": "soft"
},
"tagging": {
"z_rev": 2.0,
"z_vol": 2.0,

View File

@@ -9,6 +9,10 @@ from typing import Dict, List, Optional, Sequence, Tuple
import numpy as np
import pandas as pd
import pyodbc
try:
import pywt
except ImportError: # pragma: no cover - optional dependency
pywt = None
DEFAULT_CONFIG_PATH = Path("config/pattern_knn_config.json")
@@ -87,6 +91,58 @@ def z_norm(arr: np.ndarray) -> Optional[np.ndarray]:
return (arr - mu) / (sd + 1e-12)
def wavelet_denoise(
series: pd.Series,
wavelet: str = "db3",
level: int = 3,
mode: str = "symmetric",
threshold_mode: str = "soft",
) -> Optional[pd.Series]:
"""Denoise/reshape the series with a wavelet decomposition.
Keeps the original index length; if PyWavelets is missing the function
returns None so callers can gracefully fall back to the raw signal.
"""
if pywt is None:
print("[WARN] pywt non installato: salto il filtraggio wavelet.")
return None
s = pd.to_numeric(series, errors="coerce")
if s.dropna().empty:
return None
w = pywt.Wavelet(wavelet)
max_level = pywt.dwt_max_level(len(s.dropna()), w.dec_len)
lvl = max(1, min(level, max_level)) if max_level > 0 else 1
valid = s.dropna()
coeffs = pywt.wavedec(valid.values, w, mode=mode, level=lvl)
# Universal threshold (Donoho-Johnstone)
sigma = np.median(np.abs(coeffs[-1])) / 0.6745 if len(coeffs[-1]) > 0 else 0.0
thresh = sigma * np.sqrt(2 * np.log(len(valid))) if sigma > 0 else 0.0
if thresh <= 0:
coeffs_f = coeffs
else:
def _safe_thresh(c: np.ndarray) -> np.ndarray:
if c is None or c.size == 0:
return c
if threshold_mode == "hard":
return pywt.threshold(c, value=thresh, mode="hard")
# soft threshold without divide-by-zero warnings
mag = np.abs(c)
mask = mag > thresh
out = np.zeros_like(c)
out[mask] = np.sign(c[mask]) * (mag[mask] - thresh)
return out
coeffs_f = [coeffs[0]] + [_safe_thresh(c) for c in coeffs[1:]]
rec = pywt.waverec(coeffs_f, w, mode=mode)
rec = rec[: len(valid)]
filt = pd.Series(rec, index=valid.index)
# Re-allineamento all'indice originale
return filt.reindex(s.index).interpolate(limit_direction="both")
def build_pattern_library(
ret_series: pd.Series,
wp: int,