refactoring
This commit is contained in:
@@ -9,10 +9,6 @@ from typing import Dict, List, Optional, Sequence, Tuple
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyodbc
|
||||
try:
|
||||
import pywt
|
||||
except ImportError: # pragma: no cover - optional dependency
|
||||
pywt = None
|
||||
|
||||
DEFAULT_CONFIG_PATH = Path("config/pattern_knn_config.json")
|
||||
|
||||
@@ -91,58 +87,6 @@ def z_norm(arr: np.ndarray) -> Optional[np.ndarray]:
|
||||
return (arr - mu) / (sd + 1e-12)
|
||||
|
||||
|
||||
def wavelet_denoise(
|
||||
series: pd.Series,
|
||||
wavelet: str = "db3",
|
||||
level: int = 3,
|
||||
mode: str = "symmetric",
|
||||
threshold_mode: str = "soft",
|
||||
) -> Optional[pd.Series]:
|
||||
"""Denoise/reshape the series with a wavelet decomposition.
|
||||
|
||||
Keeps the original index length; if PyWavelets is missing the function
|
||||
returns None so callers can gracefully fall back to the raw signal.
|
||||
"""
|
||||
if pywt is None:
|
||||
print("[WARN] pywt non installato: salto il filtraggio wavelet.")
|
||||
return None
|
||||
s = pd.to_numeric(series, errors="coerce")
|
||||
if s.dropna().empty:
|
||||
return None
|
||||
|
||||
w = pywt.Wavelet(wavelet)
|
||||
max_level = pywt.dwt_max_level(len(s.dropna()), w.dec_len)
|
||||
lvl = max(1, min(level, max_level)) if max_level > 0 else 1
|
||||
|
||||
valid = s.dropna()
|
||||
coeffs = pywt.wavedec(valid.values, w, mode=mode, level=lvl)
|
||||
# Universal threshold (Donoho-Johnstone)
|
||||
sigma = np.median(np.abs(coeffs[-1])) / 0.6745 if len(coeffs[-1]) > 0 else 0.0
|
||||
thresh = sigma * np.sqrt(2 * np.log(len(valid))) if sigma > 0 else 0.0
|
||||
if thresh <= 0:
|
||||
coeffs_f = coeffs
|
||||
else:
|
||||
def _safe_thresh(c: np.ndarray) -> np.ndarray:
|
||||
if c is None or c.size == 0:
|
||||
return c
|
||||
if threshold_mode == "hard":
|
||||
return pywt.threshold(c, value=thresh, mode="hard")
|
||||
# soft threshold without divide-by-zero warnings
|
||||
mag = np.abs(c)
|
||||
mask = mag > thresh
|
||||
out = np.zeros_like(c)
|
||||
out[mask] = np.sign(c[mask]) * (mag[mask] - thresh)
|
||||
return out
|
||||
|
||||
coeffs_f = [coeffs[0]] + [_safe_thresh(c) for c in coeffs[1:]]
|
||||
|
||||
rec = pywt.waverec(coeffs_f, w, mode=mode)
|
||||
rec = rec[: len(valid)]
|
||||
filt = pd.Series(rec, index=valid.index)
|
||||
# Re-allineamento all'indice originale
|
||||
return filt.reindex(s.index).interpolate(limit_direction="both")
|
||||
|
||||
|
||||
def build_pattern_library(
|
||||
ret_series: pd.Series,
|
||||
wp: int,
|
||||
@@ -262,23 +206,16 @@ def hurst_rs(series: pd.Series) -> Optional[float]:
|
||||
return float(h)
|
||||
|
||||
|
||||
def build_hurst_map(
|
||||
returns_long: pd.DataFrame,
|
||||
lookback: Optional[int] = None,
|
||||
min_length: int = 100,
|
||||
) -> Dict[str, float]:
|
||||
def build_hurst_map(returns_long: pd.DataFrame, lookback: int = 252) -> Dict[str, float]:
|
||||
if returns_long.empty:
|
||||
return {}
|
||||
ret_wide = returns_long.pivot(index="Date", columns="ISIN", values="Ret").sort_index()
|
||||
hurst_map: Dict[str, float] = {}
|
||||
for isin in ret_wide.columns:
|
||||
series = ret_wide[isin].dropna().astype(float)
|
||||
if len(series) < max(1, int(min_length)):
|
||||
if len(series) < max(lookback, 100):
|
||||
continue
|
||||
window = len(series) if lookback is None else min(len(series), int(lookback))
|
||||
if window <= 0:
|
||||
continue
|
||||
h_val = hurst_rs(series.iloc[-window:])
|
||||
h_val = hurst_rs(series.iloc[-lookback:])
|
||||
if h_val is None or not np.isfinite(h_val):
|
||||
continue
|
||||
hurst_map[str(isin)] = float(h_val)
|
||||
|
||||
Reference in New Issue
Block a user