eseguiti fix e introdotta versione wavelets

2025-12-09 20:35:39 +01:00
parent 9543d63591
commit 71c796b1f1
4 changed files with 2805 additions and 269 deletions
--- a/Hurst.py
+++ b/Hurst.py
@@ -1391,13 +1391,6 @@ for c in ["CAGR_%","Sharpe","Calmar","MaxDD_%eq","HitRate_%"]:
    if c in df_sum.columns:
        df_sum[c] = pd.to_numeric(df_sum[c], errors="coerce")

-# def is_crypto(row):
-#     txt = f"{row.get('Nome','')} {row.get('Categoria','')} {row.get('Asset Class','')}".lower()
-#     return any(k in txt for k in ["crypto","cripto","bitcoin","btc","ether","eth"])
-
-# if "is_crypto" not in df_sum.columns:
-#     df_sum["is_crypto"] = df_sum.apply(is_crypto, axis=1)
-
 def _safe_rank(s: pd.Series):
    s = pd.to_numeric(s, errors="coerce")
    if s.notna().sum() == 0:
@@ -1616,15 +1609,6 @@ if need_rebuild:
    except Exception as e:
        print(f"[WARN] Ricostruzione metriche fallita: {e}")

-# Flag crypto se manca
-if "is_crypto" not in df_sum.columns:
-    def _is_crypto_row(row):
-        txt = f"{row.get('Nome','')} {row.get('Categoria','')} {row.get('Asset Class','')}".lower()
-        return any(k in txt for k in ["crypto","cripto","bitcoin","btc","ether","eth"])
-    df_sum["is_crypto"] = df_sum.apply(_is_crypto_row, axis=1)
-
-
-
 df_sum = _apply_score(df_sum)

 TOP_N = 15
@@ -1635,8 +1619,6 @@ base_isins = (
 )

 # Nessuna strategia cripto separata: le criptovalute sono trattate come gli altri asset
-crypto_isin = None
-
 print(f"[INFO] Ranking full-sample (solo debug, i portafogli usano ranking rolling): {base_isins}")

 # -----------------------------
@@ -1708,7 +1690,6 @@ def plot_portfolio_composition(weights: pd.DataFrame,
    Esempio:
        plot_portfolio_composition(w_eq,  "Equal Weight",        "composition_equal_weight.png")
        plot_portfolio_composition(w_rp,  "Risk Parity",         "composition_risk_parity.png")
-        plot_portfolio_composition(w_agg, "Aggressiva + Crypto", "composition_agg_crypto.png")
    """
    import os
    import numpy as np
@@ -2061,10 +2042,9 @@ def plot_portfolio_composition_fixed(weights: pd.DataFrame,

    # Plot salvato senza visualizzazione interattiva

-# --- 1) Pesi teorici dei tre portafogli (già costruiti sopra) ---
+# --- 1) Pesi teorici dei portafogli (già costruiti sopra) ---
 # w_eq : equal weight su 'cols'
 # w_rp : risk parity (weights_rp)
-# w_agg: 85% equal + 15% crypto se disponibile

 def _sanitize_weights(W: pd.DataFrame, index_like: pd.Index) -> pd.DataFrame:
    if W is None or W.empty:
@@ -2079,18 +2059,14 @@ if 'w_eq' not in globals():
    w_eq = pd.DataFrame(index=wide_pnl.index, columns=wide_pnl.columns)
 if 'w_rp' not in globals():
    w_rp = weights_rp.copy() if isinstance(weights_rp, pd.DataFrame) else pd.DataFrame(index=wide_pnl.index, columns=wide_pnl.columns)
-if 'w_agg' not in globals():
-    w_agg = w_eq.copy()

 w_eq  = _sanitize_weights(w_eq,  wide_pnl.index)
 w_rp  = _sanitize_weights(w_rp,  wide_pnl.index)
-w_agg = _sanitize_weights(w_agg, wide_pnl.index)

 # --- 2) Pesi ATTIVI (mascherati con i Signal) ---
 #   renorm_to_1=False  → lascia la quota NON investita in 'Cash'
 w_eq_act  = make_active_weights(w_eq,  wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash")
 w_rp_act  = make_active_weights(w_rp,  wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash")
-w_agg_act = make_active_weights(w_agg, wide_sig, renorm_to_1=False, add_cash=True, cash_label="Cash")

 # Export pesi giornalieri (Equal/Risk Parity) con cash normalizzato a 100%
 def _export_weights_daily(w_eq_act_df: pd.DataFrame, w_rp_act_df: pd.DataFrame, path=WEIGHTS_DAILY_XLSX):
@@ -2310,7 +2286,7 @@ import numpy as np

 def rebuild_daily_from_trades_dict(trades_dict):
    """
-    trades_dict: {'Equal_Weight': df, 'Risk_Parity': df, 'Aggressiva_Crypto': df}
+    trades_dict: {'Equal_Weight': df, 'Risk_Parity': df}
    Ogni df deve avere: OpenDate, CloseDate, Size, Duration_bars, PnL_%
    Regola: distribuiamo il PnL del trade su ciascun giorno di durata con
            un rendimento giornaliero costante r tale che (1+r)^D - 1 = PnL.
@@ -2614,16 +2590,14 @@ except NameError:
    DAYS_PER_YEAR = 252

 def _select_isins_for_topN(df_sum: pd.DataFrame, top_n: int):
-    """Seleziona i migliori 'top_n' ISIN (crypto incluse) in base allo Score."""
+    """Seleziona i migliori 'top_n' ISIN in base allo Score."""
    df_sum_loc = df_sum.copy()
    base_isins_N = (
        df_sum_loc
        .sort_values("Score", ascending=False)
        .head(top_n)["ISIN"].astype(str).str.strip().tolist()
    )
-    # Nessuna crypto separata: tutto trattato allo stesso modo
-    crypto_isin_N = None
-    return base_isins_N, crypto_isin_N
+    return base_isins_N

 def _build_portfolio_returns_for_isins(base_isins_N, wide_pnl):
    """
@@ -2658,244 +2632,30 @@ def _build_portfolio_returns_for_isins(base_isins_N, wide_pnl):

    return ret_eq_N, ret_rp_N

-# # --- calcolo metriche per TopN 8..15 --- (DISATTIVATO)
-# rows_byN = []
-# for top_n in range(8, 16):
-#     portN = _get_dynamic_portfolio(top_n)
-#     ret_eq_N = portN["ret_eq"]
-#     ret_rp_N = portN["ret_rp"]
-# 
-#     # (OPZIONALE) se vuoi anche salvare equity/heatmap per ciascun N:
-#     # _save_equity_plot_byN(ret_eq_N, ret_rp_N, top_n)
-#     # _save_heatmaps_byN(ret_eq_N, ret_rp_N, top_n)
-# 
-#     # Calcola le metriche (come nell'ottimizzatore)
-#     for strategy_name, rser in [
-#         ("Equal_Weight", ret_eq_N),
-#         ("Risk_Parity",  ret_rp_N),
-#     ]:
-#         m = _calc_all_metrics_from_returns(rser)
-#         m["TopN"] = top_n
-#         m["Strategy"] = strategy_name
-#         rows_byN.append(m)
-# 
-# # DataFrame finale con la colonna TopN
-# final_byN_df = pd.DataFrame(rows_byN)[[
-#     "TopN", "Strategy",
-#     "Rendimento_Ann", "Volatilita_Ann", "CAGR", "R2_Equity",
-#     "MaxDD", "DD_Duration_Max", "TTR_from_MDD",
-#     "AAW", "AUW", "Heal_Index", "H_min_100m_5Y"
-# ]].sort_values(["TopN","Strategy"]).reset_index(drop=True)
-# 
-# # Salvataggio: aggiunge/riscrive i fogli in final_metrics.xlsx
-# # - mantiene (se vuoi) anche il foglio "Portfolio_Metrics" del caso corrente TOP_N
-# try:
-#     with pd.ExcelWriter(FINAL_METRICS_XLSX, engine="openpyxl", mode="a", if_sheet_exists="replace") as xw:
-#         final_byN_df.to_excel(xw, "Portfolio_Metrics_By_N", index=False)
-# except Exception:
-#     with pd.ExcelWriter(FINAL_METRICS_XLSX) as xw:
-#         final_byN_df.to_excel(xw, "Portfolio_Metrics_By_N", index=False)
-# 
-# print(f"✅ Salvato: {FINAL_METRICS_XLSX} (Portfolio_Metrics_By_N) per TopN = 8..15")
+# ==============================
+# Metriche portafoglio (TOP_N corrente) → Excel
+# ==============================
+metrics_rows = []
+for strategy_name, rser in [
+    ("Equal_Weight", ret_eq),
+    ("Risk_Parity",  ret_rp),
+]:
+    m = _calc_all_metrics_from_returns(rser)
+    m["TopN"] = TOP_N
+    m["Strategy"] = strategy_name
+    metrics_rows.append(m)

-# # ======================================================================
-# # 6bis) Plot per ciascun TopN (8..15): Equity + Heatmap per strategia (DISATTIVATO)
-# # ======================================================================
-# # import os
-# # import numpy as np
-# # import matplotlib.pyplot as plt
-# #
-# # OUT_DIR = PLOT_DIR
-# # OUT_DIR.mkdir(parents=True, exist_ok=True)
-# #
-# # def _safe_series(r: pd.Series) -> pd.Series:
-# #     """Forza tipo numerico e se tutto NaN, rimpiazza con 0.0 (linea piatta ma plot salvato)."""
-# #     r = pd.to_numeric(r, errors="coerce")
-# #     if r.notna().sum() == 0:
-# #         r = pd.Series(0.0, index=r.index)
-# #     return r.fillna(0.0)
-# #
-# # def _save_equity_plot_byN(ret_eq, ret_rp, top_n: int):
-# #     ret_eq  = _safe_series(ret_eq)
-# #     ret_rp  = _safe_series(ret_rp)
-# #
-# #     eq_eq  = equity_from_returns(ret_eq)
-# #     eq_rp  = equity_from_returns(ret_rp)
-# #
-# #     if eq_eq.empty and eq_rp.empty:
-# #         eq_eq = pd.Series([100.0], index=[pd.Timestamp("2000-01-01")])
-# #
-# #     fig, ax = plt.subplots(figsize=(10, 6))
-# #     eq_eq.plot(ax=ax, label="Equal Weight")
-# #     eq_rp.plot(ax=ax, label="Risk Parity")
-# #     ax.legend()
-# #     ax.grid(True)
-# #     ax.set_title(f"Equity line - TopN={top_n}")
-# #     fig.tight_layout()
-# #     savefig_safe(str(OUT_DIR / f"equity_topN_{top_n}.png"), dpi=150)
-# #     plt.close(fig)
-# #
-# # def _save_heatmaps_byN(ret_eq, ret_rp, top_n: int):
-# #     ret_eq  = _safe_series(ret_eq)
-# #     ret_rp  = _safe_series(ret_rp)
-# #
-# #     plot_heatmap_monthly(
-# #         ret_eq,
-# #         f"Heatmap mensile - Equal Weight (TopN={top_n})",
-# #         save_path=OUT_DIR / f"heatmap_equal_topN_{top_n}.png"
-# #     )
-# #     plot_heatmap_monthly(
-# #         ret_rp,
-# #         f"Heatmap mensile - Risk Parity (TopN={top_n})",
-# #         save_path=OUT_DIR / f"heatmap_rp_topN_{top_n}.png"
-# #     )
-# #
-# # # Loop 8..15 replicando i plot per ciascuna combinazione
-# # for top_n in range(8, 16):
-# #     portN = _get_dynamic_portfolio(top_n)
-# #     ret_eq_N = portN["ret_eq"]
-# #     ret_rp_N = portN["ret_rp"]
-# #
-# #     _save_equity_plot_byN(ret_eq_N, ret_rp_N, top_n)
-# #     _save_heatmaps_byN(ret_eq_N, ret_rp_N, top_n)
-# #
-# # print(f"✅ Plot salvati in: {OUT_DIR}/")
+df_metrics = pd.DataFrame(metrics_rows)[[
+    "TopN", "Strategy",
+    "Rendimento_Ann", "Volatilita_Ann", "CAGR", "R2_Equity",
+    "MaxDD", "DD_Duration_Max", "TTR_from_MDD",
+    "AAW", "AUW", "Heal_Index", "H_min_100m_5Y",
+]]

-# ======================================================================
-# 6ter) Plot composizione (ATTIVI + Cash) per ciascun TopN (8..15)
-# ======================================================================
-import os
-import numpy as np
-import matplotlib.pyplot as plt
-
-OUT_DIR = PLOT_DIR
-OUT_DIR.mkdir(parents=True, exist_ok=True)
-
-# -- safety: helper per pesi attivi e plotting, se mancassero già nel file --
-
-if 'make_active_weights' not in globals():
-    def make_active_weights(w_base: pd.DataFrame,
-                            sig: pd.DataFrame,
-                            renorm_to_1: bool = False,
-                            add_cash: bool = True,
-                            cash_label: str = "Cash") -> pd.DataFrame:
-        import numpy as np, pandas as pd
-        if w_base is None or w_base.empty:
-            return pd.DataFrame(index=sig.index, columns=[])
-        W = w_base.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0)
-        S = sig.reindex_like(W).fillna(0).astype(int)
-        W_active = W * (S > 0)
-        row_sum = W_active.sum(axis=1)
-        if renorm_to_1:
-            W_active = W_active.div(row_sum.replace(0, np.nan), axis=0).fillna(0.0)
-            if add_cash:
-                W_active[cash_label] = 0.0
-        else:
-            if add_cash:
-                cash = (1.0 - row_sum).clip(lower=0.0, upper=1.0)
-                W_active[cash_label] = cash
-        keep = [c for c in W_active.columns if float(np.abs(W_active[c]).sum()) > 0.0]
-        return W_active[keep]
-
-if 'plot_portfolio_composition_fixed' not in globals():
-    def plot_portfolio_composition_fixed(weights: pd.DataFrame,
-                                         title: str,
-                                         save_path: str | None = None,
-                                         max_legend: int = 20):
-        if weights is None or getattr(weights, "empty", True):
-            print(f"[SKIP] Nessun peso per: {title}")
-            return
-        W = weights.copy().apply(pd.to_numeric, errors="coerce").fillna(0.0)
-        if W.index.has_duplicates:
-            W = W[~W.index.duplicated(keep="last")]
-        W = W.sort_index()
-        keep_cols = [c for c in W.columns if float(np.abs(W[c]).sum()) > 0.0]
-        if not keep_cols or len(W.index) < 2:
-            print(f"[SKIP] Dati insufficienti per: {title}")
-            return
-        W = W[keep_cols]
-        avg_w = W.mean(0).sort_values(ascending=False)
-        ordered = avg_w.index.tolist()
-        if "Cash" in ordered:
-            ordered = [c for c in ordered if c!="Cash"] + ["Cash"]
-        if len(ordered) > max_legend:
-            head = ordered[:max_legend]
-            if "Cash" not in head and "Cash" in ordered:
-                head = head[:-1] + ["Cash"]
-            tail = [c for c in ordered if c not in head]
-            W_show = W[head].copy()
-            if tail:
-                W_show["Altri"] = W[tail].sum(1)
-                ordered = head + ["Altri"]
-            else:
-                ordered = head
-        else:
-            W_show = W[ordered].copy()
-        cmap = plt.colormaps.get_cmap("tab20")
-        colors = [cmap(i % cmap.N) for i in range(len(ordered))]
-        fig, ax = plt.subplots(figsize=(11, 6))
-        ax.stackplot(W_show.index, [W_show[c].values for c in ordered], labels=ordered, colors=colors)
-        ax.set_title(f"Composizione portafoglio nel tempo – {title}")
-        ymax = float(np.nanmax(W_show.sum(1).values))
-        ax.set_ylim(0, max(1.0, ymax if np.isfinite(ymax) else 1.0))
-        ax.grid(True, alpha=0.3)
-        ax.set_ylabel("Peso")
-        ax.set_yticklabels([f"{y*100:.0f}%" for y in ax.get_yticks()])
-        ncol = 2 if len(ordered) > 10 else 1
-        ax.legend(loc="upper left", bbox_to_anchor=(1.01, 1), frameon=False, ncol=ncol, title="ISIN")
-        fig.tight_layout()
-        if save_path:
-            folder = os.path.dirname(save_path) or "."
-            os.makedirs(folder, exist_ok=True)
-            fig.savefig(save_path, dpi=150, bbox_inches="tight")
-            print(f"💾 Salvato: {os.path.abspath(save_path)}")
-        # Nessuna visualizzazione interattiva
-
-def _build_weights_for_isins(base_isins_N, crypto_isin_N, wide_pnl):
-    """Costruisce i pesi TEORICI per Equal / Risk Parity / Aggressiva su un dato insieme di ISIN."""
-    colsN = [c for c in base_isins_N if c in wide_pnl.columns]
-    idx = wide_pnl.index
-    # Equal
-    if len(colsN) > 0:
-        w_eq_N = pd.DataFrame(1/len(colsN), index=idx, columns=colsN)
-    else:
-        w_eq_N = pd.DataFrame(index=idx, columns=[])
-    # Risk Parity con cap
-    if len(colsN) > 0:
-        w_rp_N = inverse_vol_weights(
-            wide_pnl[colsN],
-            window=60,
-            max_weight=RP_MAX_WEIGHT
-        )
-    else:
-        w_rp_N = pd.DataFrame(index=idx, columns=[])
-
-    # Aggressiva + Crypto
-    if (len(colsN) > 0) and (crypto_isin_N is not None) and (crypto_isin_N in wide_pnl.columns):
-        cols_agg = colsN + [crypto_isin_N]
-        w_agg_N = pd.DataFrame(0.0, index=idx, columns=cols_agg)
-        w_agg_N[colsN] = 0.85/len(colsN)
-        w_agg_N[crypto_isin_N] = 0.15
-    else:
-        w_agg_N = w_eq_N.copy()
-    # normalizza i TEORICI (solo per sicurezza numerica)
-    def _norm(W):
-        if W is None or W.empty:
-            return pd.DataFrame(index=idx, columns=[])
-        rs = W.sum(axis=1).replace(0, np.nan)
-        return W.div(rs, axis=0).fillna(0.0).clip(lower=0.0)
-    return _norm(w_eq_N), _norm(w_rp_N), _norm(w_agg_N)
-
-# # === Loop 8..15: crea pesi, attiva coi Signal, plotta e SALVA in OUT_DIR === (DISATTIVATO)
-# for top_n in range(8, 16):
-#     portN = _get_dynamic_portfolio(top_n)
-#     w_eq_act_N = portN["w_eq_act"]
-#     w_rp_act_N = portN["w_rp_act"]
-#
-#     # path di salvataggio
-#     sp_eq = OUT_DIR / f"composition_equal_topN_{top_n}.png"
-#     sp_rp = OUT_DIR / f"composition_rp_topN_{top_n}.png"
-#
-#     # plot + salvataggio (SOLO Equal e Risk Parity)
-#     plot_portfolio_composition_fixed(w_eq_act_N, f"Equal Weight (attivi + Cash) – TopN={top_n}", sp_eq)
-#     plot_portfolio_composition_fixed(w_rp_act_N, f"Risk Parity (attivi + Cash) – TopN={top_n}", sp_rp)
+try:
+    with pd.ExcelWriter(FINAL_METRICS_XLSX, engine="openpyxl", mode="a", if_sheet_exists="replace") as xw:
+        df_metrics.to_excel(xw, sheet_name="Portfolio_Metrics", index=False)
+except Exception:
+    with pd.ExcelWriter(FINAL_METRICS_XLSX) as xw:
+        df_metrics.to_excel(xw, sheet_name="Portfolio_Metrics", index=False)
+print(f"[INFO] Salvato: {FINAL_METRICS_XLSX} (Portfolio_Metrics)")
--- a/Wavelets.py
+++ b/Wavelets.py
--- a/config/pattern_knn_config.json
+++ b/config/pattern_knn_config.json
@@ -11,6 +11,13 @@
    "theta": 0.005,
    "embargo": null
  },
+  "wavelet_filter": {
+    "enabled": true,
+    "wavelet": "db4",
+    "level": 4,
+    "mode": "symmetric",
+    "threshold_mode": "soft"
+  },
  "tagging": {
    "z_rev": 2.0,
    "z_vol": 2.0,
--- a/shared_utils.py
+++ b/shared_utils.py
@@ -9,6 +9,10 @@ from typing import Dict, List, Optional, Sequence, Tuple
 import numpy as np
 import pandas as pd
 import pyodbc
+try:
+    import pywt
+except ImportError:  # pragma: no cover - optional dependency
+    pywt = None

 DEFAULT_CONFIG_PATH = Path("config/pattern_knn_config.json")

@@ -87,6 +91,58 @@ def z_norm(arr: np.ndarray) -> Optional[np.ndarray]:
    return (arr - mu) / (sd + 1e-12)


+def wavelet_denoise(
+    series: pd.Series,
+    wavelet: str = "db3",
+    level: int = 3,
+    mode: str = "symmetric",
+    threshold_mode: str = "soft",
+) -> Optional[pd.Series]:
+    """Denoise/reshape the series with a wavelet decomposition.
+
+    Keeps the original index length; if PyWavelets is missing the function
+    returns None so callers can gracefully fall back to the raw signal.
+    """
+    if pywt is None:
+        print("[WARN] pywt non installato: salto il filtraggio wavelet.")
+        return None
+    s = pd.to_numeric(series, errors="coerce")
+    if s.dropna().empty:
+        return None
+
+    w = pywt.Wavelet(wavelet)
+    max_level = pywt.dwt_max_level(len(s.dropna()), w.dec_len)
+    lvl = max(1, min(level, max_level)) if max_level > 0 else 1
+
+    valid = s.dropna()
+    coeffs = pywt.wavedec(valid.values, w, mode=mode, level=lvl)
+    # Universal threshold (Donoho-Johnstone)
+    sigma = np.median(np.abs(coeffs[-1])) / 0.6745 if len(coeffs[-1]) > 0 else 0.0
+    thresh = sigma * np.sqrt(2 * np.log(len(valid))) if sigma > 0 else 0.0
+    if thresh <= 0:
+        coeffs_f = coeffs
+    else:
+        def _safe_thresh(c: np.ndarray) -> np.ndarray:
+            if c is None or c.size == 0:
+                return c
+            if threshold_mode == "hard":
+                return pywt.threshold(c, value=thresh, mode="hard")
+            # soft threshold without divide-by-zero warnings
+            mag = np.abs(c)
+            mask = mag > thresh
+            out = np.zeros_like(c)
+            out[mask] = np.sign(c[mask]) * (mag[mask] - thresh)
+            return out
+
+        coeffs_f = [coeffs[0]] + [_safe_thresh(c) for c in coeffs[1:]]
+
+    rec = pywt.waverec(coeffs_f, w, mode=mode)
+    rec = rec[: len(valid)]
+    filt = pd.Series(rec, index=valid.index)
+    # Re-allineamento all'indice originale
+    return filt.reindex(s.index).interpolate(limit_direction="both")
+
+
 def build_pattern_library(
    ret_series: pd.Series,
    wp: int,