Add shared utilities and config

This commit is contained in:
fredmaloggia
2025-11-17 08:40:36 +01:00
parent ecf03b7775
commit 5f17a80044
4 changed files with 340 additions and 321 deletions

View File

@@ -32,57 +32,77 @@ from typing import Dict, List, Optional, Tuple, Iterable, Set
import numpy as np
import pandas as pd
from urllib.request import urlopen
from urllib.error import URLError, HTTPError
from urllib.request import urlopen
from urllib.error import URLError, HTTPError
# DB
import sqlalchemy as sa
from sqlalchemy import text as sql_text
import pyodbc
import sqlalchemy as sa
from sqlalchemy import text as sql_text
from shared_utils import (
build_hurst_map,
build_pattern_library,
characterize_window,
detect_column,
load_config,
predict_from_library,
read_connection_txt,
z_norm,
)
# =========================
# CONFIG
# =========================
BASE_DIR = Path(".")
UNIVERSO_XLSX = BASE_DIR / "Universo per Trading System.xlsx"
CONNECTION_TXT = BASE_DIR / "connection.txt"
AUDIT_LOG_CSV = BASE_DIR / "trades_audit_log.csv"
OPEN_TRADES_DIR = BASE_DIR / "open_trades"
CONFIG = load_config()
DB_CONFIG = CONFIG.get("db", {})
PATTERN_CONFIG = CONFIG.get("pattern", {})
TAGGING_CONFIG = CONFIG.get("tagging", {})
RANKING_CONFIG = CONFIG.get("ranking", {})
SIGNALS_CONFIG = CONFIG.get("signals", {})
BASE_DIR = Path(".")
UNIVERSO_XLSX = BASE_DIR / "Universo per Trading System.xlsx"
CONNECTION_TXT = BASE_DIR / "connection.txt"
AUDIT_LOG_CSV = BASE_DIR / "trades_audit_log.csv"
OPEN_TRADES_DIR = BASE_DIR / "open_trades"
def _dated_signals_filename() -> Path:
date_prefix = pd.Timestamp.today().strftime("%Y%m%d")
return BASE_DIR / f"{date_prefix}_signals.xlsx"
# Stored procedure / parametri DB
SP_NAME_DEFAULT = "opt_RendimentoGiornaliero1_ALL"
SP_N_DEFAULT = 1305
PTF_CURR_DEFAULT = "EUR"
SP_NAME_DEFAULT = DB_CONFIG.get("stored_proc", "opt_RendimentoGiornaliero1_ALL")
SP_N_DEFAULT = DB_CONFIG.get("n_bars", 1305)
PTF_CURR_DEFAULT = DB_CONFIG.get("ptf_curr", "EUR")
# Pattern recognition (come backtest)
WP = 60
HA = 10
KNN_K = 25
THETA = 0.005 # 0,005% in decimali (identico al backtest)
WP = PATTERN_CONFIG.get("wp", 60)
HA = PATTERN_CONFIG.get("ha", 10)
KNN_K = PATTERN_CONFIG.get("knn_k", 25)
THETA = PATTERN_CONFIG.get("theta", 0.005) # 0,005% in decimali (identico al backtest)
Z_REV = TAGGING_CONFIG.get("z_rev", 2.0)
Z_VOL = TAGGING_CONFIG.get("z_vol", 2.0)
STD_COMP_PCT = TAGGING_CONFIG.get("std_comp_pct", 0.15)
# Exit rules (identiche al backtest)
SL_BPS = 300.0
TP_BPS = 800.0
TRAIL_BPS = 300.0
TIME_STOP_BARS = 20
THETA_EXIT = 0.0 # soglia debolezza
WEAK_DAYS_EXIT = None # uscita IMMEDIATA in caso di debolezza (come backtest)
SL_BPS = SIGNALS_CONFIG.get("sl_bps", 300.0)
TP_BPS = SIGNALS_CONFIG.get("tp_bps", 800.0)
TRAIL_BPS = SIGNALS_CONFIG.get("trail_bps", 300.0)
TIME_STOP_BARS = SIGNALS_CONFIG.get("time_stop_bars", 20)
THETA_EXIT = SIGNALS_CONFIG.get("theta_exit", 0.0) # soglia debolezza
WEAK_DAYS_EXIT = SIGNALS_CONFIG.get("weak_days_exit") # uscita IMMEDIATA in caso di debolezza (come backtest)
# Ranking e selezione Top-N per APERTURE
MAX_OPEN = 15 # cap strumenti aperti oggi (come backtest)
MAX_OPEN = SIGNALS_CONFIG.get("max_open", 15) # cap strumenti aperti oggi (come backtest)
# Allineamento al backtest v3.1.5 per il cap del Risk Parity
TOP_N_MAX = MAX_OPEN
RP_MAX_WEIGHT = 2 / TOP_N_MAX # ≈ 0.1333 = 13,33% per singolo asset
TOP_N_MAX = RANKING_CONFIG.get("top_n_max", MAX_OPEN)
RP_MAX_WEIGHT = RANKING_CONFIG.get("rp_max_weight", 2 / max(TOP_N_MAX, 1)) # ≈ 0.1333 = 13,33% per singolo asset
# Sizing
BASE_CAPITAL_PER_STRATEGY = 100.0
MIN_TRADE_NOTIONAL = 0.01
RISK_PARITY_LOOKBACK = 60
BASE_CAPITAL_PER_STRATEGY = SIGNALS_CONFIG.get("base_capital_per_strategy", 100.0)
MIN_TRADE_NOTIONAL = SIGNALS_CONFIG.get("min_trade_notional", 0.01)
RISK_PARITY_LOOKBACK = SIGNALS_CONFIG.get("risk_parity_lookback", 60)
# Calendario
BUSINESS_DAYS_ONLY = True
@@ -111,37 +131,9 @@ def _safe_to_float(x) -> Optional[float]:
except Exception:
return None
# =========================
# CONNESSIONE DB
# =========================
def read_connection_txt(path: Path) -> str:
if not path.exists():
raise FileNotFoundError(f"Missing connection.txt at {path}")
params: Dict[str, str] = {}
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
params[k.strip().lower()] = v.strip()
username = params.get("username")
password = params.get("password")
host = params.get("host")
port = params.get("port", "1433")
database = params.get("database")
if not all([username, password, host, database]):
raise ValueError("connection.txt incompleto: servono username/password/host/database.")
installed = [d for d in pyodbc.drivers()]
driver_q = "ODBC+Driver+18+for+SQL+Server" if "ODBC Driver 18 for SQL Server" in installed else "ODBC+Driver+17+for+SQL+Server"
return f"mssql+pyodbc://{username}:{password}@{host}:{port}/{database}?driver={driver_q}"
def _db_fetch_returns(conn_str: str,
isins: List[str],
sp_name: Optional[str] = None,
def _db_fetch_returns(conn_str: str,
isins: List[str],
sp_name: Optional[str] = None,
n_bars: Optional[int] = None,
ptf_curr: Optional[str] = None) -> pd.DataFrame:
engine = sa.create_engine(conn_str, fast_executemany=True)
@@ -152,22 +144,11 @@ def _db_fetch_returns(conn_str: str,
sql_sp = sql_text(f"EXEC {sp} @ISIN = :isin, @n = :n, @PtfCurr = :ptf")
frames: List[pd.DataFrame] = []
def _pick(df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
low = {c.lower(): c for c in df.columns}
for c in candidates:
if c.lower() in low:
return low[c.lower()]
for c in df.columns:
cl = c.lower()
if any(tok in cl for tok in [x.lower() for x in candidates]):
return c
return None
with engine.begin() as conn:
for i, isin in enumerate(isins, start=1):
print(f"[DB] ({i}/{len(isins)}) scarico serie storica per {isin} ...", flush=True)
try:
df = pd.read_sql_query(sql_sp, conn, params={"isin": str(isin), "n": int(n_val), "ptf": ptf})
with engine.begin() as conn:
for i, isin in enumerate(isins, start=1):
print(f"[DB] ({i}/{len(isins)}) scarico serie storica per {isin} ...", flush=True)
try:
df = pd.read_sql_query(sql_sp, conn, params={"isin": str(isin), "n": int(n_val), "ptf": ptf})
except Exception as e:
print(f"[ERROR] SP {sp} fallita per {isin}: {e}")
continue
@@ -176,11 +157,11 @@ def _db_fetch_returns(conn_str: str,
print(f"[WARN] Nessun dato per {isin}")
continue
col_date = _pick(df, ["Date", "Data", "Datetime", "Timestamp", "Time"])
col_ret = _pick(df, ["Ret", "Return", "Rendimento", "Rend", "Ret_%", "RET"])
if not col_date or not col_ret:
print(f"[WARN] Colonne mancanti per {isin}")
continue
col_date = detect_column(df, ["Date", "Data", "Datetime", "Timestamp", "Time"])
col_ret = detect_column(df, ["Ret", "Return", "Rendimento", "Rend", "Ret_%", "RET"])
if not col_date or not col_ret:
print(f"[WARN] Colonne mancanti per {isin}")
continue
out = df[[col_date, col_ret]].copy()
out.columns = ["Date", "Ret"]
@@ -278,102 +259,6 @@ def get_open_price(isin: str, universe: pd.DataFrame) -> Optional[float]:
# =========================
# HURST ESTIMATOR & MAP
# =========================
from typing import Optional # in cima al file c'è già Optional nei type hints, quindi ok
def _hurst_rs(series: pd.Series) -> Optional[float]:
"""
Stima semplice del coefficiente di Hurst tramite Rescaled Range (R/S) su un'unica finestra.
Ritorna NaN se la serie è troppo corta o degenerata.
"""
x = pd.to_numeric(series.dropna(), errors="coerce").astype(float).values
n = len(x)
if n < 100:
return None
x = x - x.mean()
z = np.cumsum(x)
R = z.max() - z.min()
S = x.std(ddof=1)
if S <= 0 or R <= 0:
return None
H = np.log(R / S) / np.log(n)
if not np.isfinite(H):
return None
return float(H)
def build_hurst_map(returns_long: pd.DataFrame,
lookback: int = 252) -> Dict[str, float]:
"""
Costruisce una mappa ISIN -> Hurst usando gli ultimi `lookback` rendimenti.
"""
if returns_long.empty:
return {}
ret_wide = returns_long.pivot(index="Date", columns="ISIN", values="Ret").sort_index()
hurst_map: Dict[str, float] = {}
for isin in ret_wide.columns:
s = ret_wide[isin].dropna().astype(float)
if len(s) < max(lookback, 100):
continue
h = _hurst_rs(s.iloc[-lookback:])
if h is None or not np.isfinite(h):
continue
hurst_map[str(isin)] = float(h)
return hurst_map
# =========================
# PATTERN RECOGNITION (WP/HA)
# =========================
def z_norm(arr: np.ndarray) -> Optional[np.ndarray]:
arr = np.asarray(arr, dtype=float)
mu = arr.mean()
sd = arr.std()
if sd < 1e-12:
return None
return (arr - mu) / (sd + 1e-12)
def build_pattern_library(ret_series: pd.Series, Wp: int, Ha: int) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
x = ret_series.dropna().values
N = len(x)
if N < Wp + Ha + 10:
return None, None
wins, outs = [], []
for t in range(0, N - Wp - Ha):
win = x[t:t+Wp]
winzn = z_norm(win)
if winzn is None:
continue
outcome = np.sum(x[t+Wp : t+Wp+Ha]) # somma rendimenti futuri su Ha (decimali)
wins.append(winzn); outs.append(outcome)
if not wins:
return None, None
return np.array(wins), np.array(outs)
def predict_from_library(curr_win: np.ndarray,
lib_wins: np.ndarray,
lib_out: np.ndarray,
k: int = 25) -> Tuple[float, float, np.ndarray]:
dists = np.linalg.norm(lib_wins - curr_win, axis=1)
idx = np.argsort(dists)[:min(k, len(dists))]
return float(np.median(lib_out[idx])), float(np.mean(dists[idx])), idx
def characterize_window(ret_series: pd.Series, Wp: int) -> Tuple[Optional[str], float]:
x = ret_series.dropna().values
if len(x) < max(WP, 30):
return None, 0.0
win = x[-Wp:]
mu, sd = win.mean(), win.std()
if sd < 1e-12:
return "compression", 0.5
last3 = win[-3:] if len(win) >= 3 else win
if np.sign(last3).sum() in (3, -3):
return "momentum_burst", min(1.0, abs(last3.sum())/(sd+1e-12))
return None, 0.0
# =========================
# GENERAZIONE SEGNALI (EOD su D)
# =========================
@@ -404,17 +289,17 @@ def generate_signals_today(universe: pd.DataFrame,
lib_wins, lib_out = build_pattern_library(r, WP, HA)
if lib_wins is None or len(r) < WP + HA:
est_out, avg_dist, sig = np.nan, np.nan, 0
ptype, pconf = characterize_window(r, WP)
ptype, pconf = characterize_window(r, WP, z_rev=Z_REV, z_vol=Z_VOL, std_comp_pct=STD_COMP_PCT)
else:
curr = r.values[-WP:]
curr_zn = z_norm(curr)
if curr_zn is None:
est_out, avg_dist, sig = np.nan, np.nan, 0
ptype, pconf = characterize_window(r, WP)
else:
est_out, avg_dist, _ = predict_from_library(curr_zn, lib_wins, lib_out, k=KNN_K)
sig = 1 if (pd.notna(est_out) and float(est_out) > float(theta_entry)) else 0
ptype, pconf = characterize_window(r, WP)
if curr_zn is None:
est_out, avg_dist, sig = np.nan, np.nan, 0
ptype, pconf = characterize_window(r, WP, z_rev=Z_REV, z_vol=Z_VOL, std_comp_pct=STD_COMP_PCT)
else:
est_out, avg_dist, _ = predict_from_library(curr_zn, lib_wins, lib_out, k=KNN_K)
sig = 1 if (pd.notna(est_out) and float(est_out) > float(theta_entry)) else 0
ptype, pconf = characterize_window(r, WP, z_rev=Z_REV, z_vol=Z_VOL, std_comp_pct=STD_COMP_PCT)
rows.append({
"Date": decision_date, "ISIN": isin,