Allineamento alla versione principale

This commit is contained in:
fredmaloggia
2025-11-24 15:03:18 +01:00
parent c816f5820b
commit 906484cb11
7 changed files with 232 additions and 816 deletions

View File

@@ -7,21 +7,15 @@ from pypfopt.exceptions import OptimizationError
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import os import os
import sys import sys
from dotenv import load_dotenv
import yaml
# Cartelle di input/output/plot # Cartelle di input/output/plot
OUTPUT_DIR = "Output" OUTPUT_DIR = "Output"
INPUT_DIR = "Input" INPUT_DIR = "Input"
PLOT_DIR = "Plot" PLOT_DIR = "Plot"
CONFIG_FILE = "config.yaml"
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(INPUT_DIR, exist_ok=True) os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(PLOT_DIR, exist_ok=True) os.makedirs(PLOT_DIR, exist_ok=True)
load_dotenv()
GAP_FFILL_LIMIT_DAYS = 5 # forward-fill limit per buchi di calendario
def excel_path(filename: str) -> str: def excel_path(filename: str) -> str:
"""Percorso completo per i file Excel di output.""" """Percorso completo per i file Excel di output."""
@@ -31,74 +25,13 @@ def plot_path(filename: str) -> str:
"""Percorso completo per i file di grafico.""" """Percorso completo per i file di grafico."""
return os.path.join(PLOT_DIR, filename) return os.path.join(PLOT_DIR, filename)
# Configurazione della connessione al database (variabili d'ambiente) e parametri portafoglio # Configurazione della connessione al database
def load_db_config(): username = 'readonly'
req = ["DB_USERNAME", "DB_PASSWORD", "DB_HOST", "DB_NAME"] password = 'e8nqtSa39L4Le3'
missing = [k for k in req if not os.getenv(k)] host = '26.69.45.60'
if missing: database = 'FirstSolutionDB'
raise RuntimeError(f"Variabili d'ambiente mancanti per il DB: {', '.join(missing)}") port = 1433
return {
"username": os.getenv("DB_USERNAME"),
"password": os.getenv("DB_PASSWORD"),
"host": os.getenv("DB_HOST"),
"port": os.getenv("DB_PORT", "1433"),
"database": os.getenv("DB_NAME"),
}
DEFAULT_VOL_TARGETS = [
{"years": 5, "target_vol": 0.06, "name": "VAR3_GBP"},
]
DEFAULT_ASSET_CLASS_LIMITS = {
'Azionari': 0.75,
'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20,
'Materie Prime': 0.05,
'Immobiliare': 0.05,
'Criptovalute': 0.05,
'Monetari': 0.10
}
def load_targets_and_limits(config_file: str, profile: str = "uk"):
"""Legge target di volatilità e limiti asset class dal file di configurazione."""
cfg = {}
try:
with open(config_file, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
except FileNotFoundError:
cfg = {}
vt_cfg = cfg.get("volatility_targets", {})
vt_list = []
if isinstance(vt_cfg, dict):
vt_list = vt_cfg.get(profile) or vt_cfg.get("default") or []
elif isinstance(vt_cfg, list):
vt_list = vt_cfg
if not vt_list:
vt_list = DEFAULT_VOL_TARGETS
volatility_targets_local = {
(int(item["years"]), float(item["target_vol"])): item["name"]
for item in vt_list
if "years" in item and "target_vol" in item and "name" in item
}
asset_limits_cfg = cfg.get("asset_class_limits") or {}
if not asset_limits_cfg:
asset_limits_cfg = DEFAULT_ASSET_CLASS_LIMITS
asset_class_limits_local = {k: float(v) for k, v in asset_limits_cfg.items()}
return volatility_targets_local, asset_class_limits_local
db_cfg = load_db_config()
username = db_cfg["username"]
password = db_cfg["password"]
host = db_cfg["host"]
database = db_cfg["database"]
port = db_cfg["port"]
connection_string = f"mssql+pyodbc://{username}:{password}@{host}:{port}/{database}?driver=ODBC+Driver+17+for+SQL+Server" connection_string = f"mssql+pyodbc://{username}:{password}@{host}:{port}/{database}?driver=ODBC+Driver+17+for+SQL+Server"
CONFIG_PROFILE = os.getenv("CONFIG_PROFILE", "uk")
volatility_targets, asset_class_limits_cfg = load_targets_and_limits(CONFIG_FILE, CONFIG_PROFILE)
try: try:
# Crea l'Engine # Crea l'Engine
@@ -121,85 +54,6 @@ template_df = pd.read_excel(template_path)
file_path = os.path.join(INPUT_DIR, 'Universo ETF per ottimizzatore UK.xlsx') file_path = os.path.join(INPUT_DIR, 'Universo ETF per ottimizzatore UK.xlsx')
df = pd.read_excel(file_path, usecols=['ISIN', 'Nome', 'Categoria', 'Asset Class', 'PesoMax', 'Codice Titolo'],dtype={'Codice Titolo':str}) df = pd.read_excel(file_path, usecols=['ISIN', 'Nome', 'Categoria', 'Asset Class', 'PesoMax', 'Codice Titolo'],dtype={'Codice Titolo':str})
# =========================
# VALIDAZIONE DATI RENDIMENTI
# =========================
def _gap_ranges_missing(series: pd.Series, all_dates: pd.DatetimeIndex, max_entries: int = 3):
"""Restituisce fino a max_entries intervalli di date mancanti (start, end, len)."""
missing_idx = series[series.isna()].index
if missing_idx.empty:
return []
positions = [all_dates.get_loc(ts) for ts in missing_idx if ts in all_dates]
if not positions:
return []
ranges = []
start_pos = positions[0]
end_pos = positions[0]
for pos in positions[1:]:
if pos == end_pos + 1:
end_pos = pos
else:
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
start_pos = end_pos = pos
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
return ranges[:max_entries]
def preprocess_returns(temp_df: pd.DataFrame, isin: str, all_dates: pd.DatetimeIndex,
gap_ffill_limit: int = GAP_FFILL_LIMIT_DAYS) -> pd.Series:
"""Pulizia/validazione serie rendimenti (tipi, duplicati, buchi, niente fillna(0))."""
required_cols = {"Px_Date", "RendimentoGiornaliero"}
missing_cols = required_cols.difference(temp_df.columns)
if missing_cols:
print(f"[{isin}] Colonne mancanti {missing_cols}, asset ignorato.")
return pd.Series(dtype=float)
df_local = temp_df.copy()
df_local["Px_Date"] = pd.to_datetime(df_local["Px_Date"], errors="coerce").dt.normalize()
df_local["RendimentoGiornaliero"] = pd.to_numeric(df_local["RendimentoGiornaliero"], errors="coerce")
before_rows = len(df_local)
df_local = df_local.dropna(subset=["Px_Date", "RendimentoGiornaliero"])
dropped_na = before_rows - len(df_local)
dup_count = df_local.duplicated(subset=["Px_Date"]).sum()
if dup_count > 0:
print(f"[{isin}] Drop duplicati su Px_Date: {dup_count}")
df_local = df_local.drop_duplicates(subset=["Px_Date"], keep="last").sort_values("Px_Date")
if df_local.empty:
print(f"[{isin}] Nessuna riga valida dopo cleaning, asset ignorato.")
return pd.Series(dtype=float)
df_local["RendimentoGiornaliero"] = df_local["RendimentoGiornaliero"] / 100.0
series = df_local.set_index("Px_Date")["RendimentoGiornaliero"].reindex(all_dates)
missing_before_fill = int(series.isna().sum())
if missing_before_fill > 0:
gaps = _gap_ranges_missing(series, all_dates, max_entries=3)
if gaps:
gaps_str = "; ".join([f"{g[0].date()}->{g[1].date()} ({g[2]} gg)" for g in gaps])
print(f"[{isin}] Date mancanti prima del fill (prime): {gaps_str}")
series_ffill = series.ffill(limit=gap_ffill_limit)
first_valid = series_ffill.first_valid_index()
last_valid = series_ffill.last_valid_index()
if first_valid is None or last_valid is None:
print(f"[{isin}] Serie vuota dopo forward-fill, asset ignorato.")
return pd.Series(dtype=float)
series_ffill = series_ffill.loc[first_valid:last_valid]
residual_missing = int(series_ffill.isna().sum())
if residual_missing > 0:
print(f"[{isin}] {residual_missing} valori mancanti non coperti dal forward-fill (limite {gap_ffill_limit}), righe scartate.")
series_ffill = series_ffill.dropna()
coverage_days = series_ffill.shape[0]
if dropped_na > 0 or missing_before_fill > 0:
print(f"[{isin}] Righe totali: {before_rows}, drop NA: {dropped_na}, copertura finale: {coverage_days} giorni.")
return series_ffill
# Intervallo di date degli ultimi 5 anni, escludendo sabati e domeniche # Intervallo di date degli ultimi 5 anni, escludendo sabati e domeniche
end_date = pd.Timestamp.now().normalize() - pd.Timedelta(days=1) end_date = pd.Timestamp.now().normalize() - pd.Timedelta(days=1)
start_date = end_date - pd.DateOffset(years=5) start_date = end_date - pd.DateOffset(years=5)
@@ -218,23 +72,29 @@ for isin in df['ISIN'].unique():
if temp_df.empty: if temp_df.empty:
print(f"Nessun dato recuperato per {isin}, skipping...") print(f"Nessun dato recuperato per {isin}, skipping...")
continue continue
clean_series = preprocess_returns(temp_df, isin, all_dates, gap_ffill_limit=GAP_FFILL_LIMIT_DAYS) temp_df['Px_Date'] = pd.to_datetime(temp_df['Px_Date'], format='%Y-%m-%d').dt.normalize()
if clean_series.empty: temp_df.set_index('Px_Date', inplace=True)
print(f"Nessun dato valido per {isin} dopo la validazione, asset ignorato.") temp_df['RendimentoGiornaliero'] = temp_df['RendimentoGiornaliero'] / 100
continue final_df[isin] = temp_df['RendimentoGiornaliero'].reindex(all_dates)
final_df[isin] = clean_series.reindex(all_dates)
isin_from_db.add(isin) isin_from_db.add(isin)
non_null = int(final_df[isin].count()) print(f"Dati recuperati per {isin}: {final_df[isin].count()} righe di dati non-null prelevate.")
missing_left = int(final_df[isin].isna().sum())
print(f"Dati recuperati per {isin}: {non_null} righe valide, mancanti residui: {missing_left}.")
except SQLAlchemyError as e: except SQLAlchemyError as e:
print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e) print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e)
final_df = final_df.loc[:, final_df.notna().any()] # elimina asset senza dati utili final_df.fillna(0, inplace=True)
if final_df.shape[1] == 0:
print("Nessun ISIN valido dopo la validazione dei rendimenti, uscita.") # Configurazione degli obiettivi di volatilità
sys.exit(1) volatility_targets = {
final_df = final_df.dropna(how='all') (5, 0.06): 'VAR3_GBP',
#(1, 0.12): 'VAR6_1Y',
#(3, 0.12): 'VAR6_3Y',
(5, 0.12): 'VAR6_GBP',
(5, 0.18): 'VAR9_GBP'
}
# Definizione del numero di giorni lavorativi per anno
days_per_year = 252
riskfree_rate = 0.02
# Ottimizzazione per ciascun target di volatilità e salvataggio dei risultati # Ottimizzazione per ciascun target di volatilità e salvataggio dei risultati
optimized_weights = pd.DataFrame() optimized_weights = pd.DataFrame()
@@ -251,6 +111,13 @@ for (years, target_vol), name in volatility_targets.items():
# Aggiunta dei vincoli per le categorie e le asset class # Aggiunta dei vincoli per le categorie e le asset class
categories_limits = df.groupby('Categoria')['PesoMax'].max().to_dict() categories_limits = df.groupby('Categoria')['PesoMax'].max().to_dict()
asset_class_limits = {
'Azionari': 0.75,
'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20,
'Materie Prime': 0.05,
'Immobiliare': 0.05
}
for category, max_weight in categories_limits.items(): for category, max_weight in categories_limits.items():
isin_list = df[df['Categoria'] == category]['ISIN'].tolist() isin_list = df[df['Categoria'] == category]['ISIN'].tolist()
@@ -258,7 +125,7 @@ for (years, target_vol), name in volatility_targets.items():
ef.add_constraint(lambda w: sum(w[i] for i in category_idx) <= max_weight) ef.add_constraint(lambda w: sum(w[i] for i in category_idx) <= max_weight)
ef.add_constraint(lambda w: sum(w[i] for i in category_idx) >= 0) ef.add_constraint(lambda w: sum(w[i] for i in category_idx) >= 0)
for asset_class, max_weight in asset_class_limits_cfg.items(): for asset_class, max_weight in asset_class_limits.items():
isin_list = df[df['Asset Class'] == asset_class]['ISIN'].tolist() isin_list = df[df['Asset Class'] == asset_class]['ISIN'].tolist()
asset_class_idx = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns] asset_class_idx = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns]
ef.add_constraint(lambda w: sum(w[i] for i in asset_class_idx) <= max_weight) ef.add_constraint(lambda w: sum(w[i] for i in asset_class_idx) <= max_weight)
@@ -295,7 +162,7 @@ for (years, target_vol), name in volatility_targets.items():
print(f"File {output_file_path} saved successfully.") print(f"File {output_file_path} saved successfully.")
# Grafico a torta per ciascun portafoglio ottimizzato # Grafico a torta per ciascun portafoglio ottimizzato
asset_allocations = {asset: 0 for asset in asset_class_limits_cfg} asset_allocations = {asset: 0 for asset in asset_class_limits}
for isin, weight in weights.items(): for isin, weight in weights.items():
asset_class = df.loc[df['ISIN'] == isin, 'Asset Class'].values[0] asset_class = df.loc[df['ISIN'] == isin, 'Asset Class'].values[0]
asset_allocations[asset_class] += weight asset_allocations[asset_class] += weight

View File

@@ -13,8 +13,6 @@ import os
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from dotenv import load_dotenv
import yaml
from sqlalchemy import create_engine, text from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@@ -27,14 +25,10 @@ from pypfopt.exceptions import OptimizationError
OUTPUT_DIR = "Output" OUTPUT_DIR = "Output"
PLOT_DIR = "Plot" PLOT_DIR = "Plot"
INPUT_DIR = "Input" INPUT_DIR = "Input"
CONFIG_FILE = "config.yaml"
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(PLOT_DIR, exist_ok=True) os.makedirs(PLOT_DIR, exist_ok=True)
os.makedirs(INPUT_DIR, exist_ok=True) os.makedirs(INPUT_DIR, exist_ok=True)
load_dotenv()
GAP_FFILL_LIMIT_DAYS = 5 # forward-fill limit per buchi di calendario
def excel_path(filename: str) -> str: def excel_path(filename: str) -> str:
"""Costruisce il percorso completo per un file Excel nella cartella di output.""" """Costruisce il percorso completo per un file Excel nella cartella di output."""
@@ -251,28 +245,27 @@ def portfolio_path_metrics(period_df: pd.DataFrame,
"Hmin_100m_5Y": hmin_5y_months "Hmin_100m_5Y": hmin_5y_months
} }
def load_db_config(): # --- Lettura parametri dal file connection.txt ---
"""Recupera i parametri di connessione dal set di variabili d'ambiente.""" params = {}
required_keys = ["DB_USERNAME", "DB_PASSWORD", "DB_HOST", "DB_NAME"] with open("connection.txt", "r") as f:
missing = [k for k in required_keys if not os.getenv(k)] for line in f:
if missing: line = line.strip()
raise RuntimeError(f"Variabili d'ambiente mancanti per il DB: {', '.join(missing)}") if line and not line.startswith("#"):
key, value = line.split("=", 1)
params[key.strip()] = value.strip()
return { username = params.get("username")
"username": os.getenv("DB_USERNAME"), password = params.get("password")
"password": os.getenv("DB_PASSWORD"), host = params.get("host")
"host": os.getenv("DB_HOST"), port = params.get("port", "1433")
"port": os.getenv("DB_PORT", "1433"), database = params.get("database")
"database": os.getenv("DB_NAME"),
}
db_cfg = load_db_config()
connection_string = ( connection_string = (
f"mssql+pyodbc://{db_cfg['username']}:{db_cfg['password']}@{db_cfg['host']}:{db_cfg['port']}/{db_cfg['database']}" f"mssql+pyodbc://{username}:{password}@{host}:{port}/{database}"
"?driver=ODBC+Driver+17+for+SQL+Server" "?driver=ODBC+Driver+17+for+SQL+Server"
) )
print("Connection string configurata da variabili d'ambiente.") print("Connection string letta correttamente")
# ========================= # =========================
# CONNESSIONE AL DB # CONNESSIONE AL DB
@@ -299,85 +292,6 @@ df = pd.read_excel(
dtype={'Codice Titolo': str} dtype={'Codice Titolo': str}
) )
# =========================
# VALIDAZIONE DATI RENDIMENTI
# =========================
def _gap_ranges_missing(series: pd.Series, all_dates: pd.DatetimeIndex, max_entries: int = 3):
"""Restituisce fino a max_entries intervalli di date mancanti (start, end, len)."""
missing_idx = series[series.isna()].index
if missing_idx.empty:
return []
positions = [all_dates.get_loc(ts) for ts in missing_idx if ts in all_dates]
if not positions:
return []
ranges = []
start_pos = positions[0]
end_pos = positions[0]
for pos in positions[1:]:
if pos == end_pos + 1:
end_pos = pos
else:
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
start_pos = end_pos = pos
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
return ranges[:max_entries]
def preprocess_returns(temp_df: pd.DataFrame, isin: str, all_dates: pd.DatetimeIndex,
gap_ffill_limit: int = GAP_FFILL_LIMIT_DAYS) -> pd.Series:
"""Pulizia/validazione serie rendimenti (tipi, duplicati, buchi, niente fillna(0))."""
required_cols = {"Px_Date", "RendimentoGiornaliero"}
missing_cols = required_cols.difference(temp_df.columns)
if missing_cols:
print(f"[{isin}] Colonne mancanti {missing_cols}, asset ignorato.")
return pd.Series(dtype=float)
df_local = temp_df.copy()
df_local["Px_Date"] = pd.to_datetime(df_local["Px_Date"], errors="coerce").dt.normalize()
df_local["RendimentoGiornaliero"] = pd.to_numeric(df_local["RendimentoGiornaliero"], errors="coerce")
before_rows = len(df_local)
df_local = df_local.dropna(subset=["Px_Date", "RendimentoGiornaliero"])
dropped_na = before_rows - len(df_local)
dup_count = df_local.duplicated(subset=["Px_Date"]).sum()
if dup_count > 0:
print(f"[{isin}] Drop duplicati su Px_Date: {dup_count}")
df_local = df_local.drop_duplicates(subset=["Px_Date"], keep="last").sort_values("Px_Date")
if df_local.empty:
print(f"[{isin}] Nessuna riga valida dopo cleaning, asset ignorato.")
return pd.Series(dtype=float)
df_local["RendimentoGiornaliero"] = df_local["RendimentoGiornaliero"] / 100.0
series = df_local.set_index("Px_Date")["RendimentoGiornaliero"].reindex(all_dates)
missing_before_fill = int(series.isna().sum())
if missing_before_fill > 0:
gaps = _gap_ranges_missing(series, all_dates, max_entries=3)
if gaps:
gaps_str = "; ".join([f"{g[0].date()}->{g[1].date()} ({g[2]} gg)" for g in gaps])
print(f"[{isin}] Date mancanti prima del fill (prime): {gaps_str}")
series_ffill = series.ffill(limit=gap_ffill_limit)
first_valid = series_ffill.first_valid_index()
last_valid = series_ffill.last_valid_index()
if first_valid is None or last_valid is None:
print(f"[{isin}] Serie vuota dopo forward-fill, asset ignorato.")
return pd.Series(dtype=float)
series_ffill = series_ffill.loc[first_valid:last_valid]
residual_missing = int(series_ffill.isna().sum())
if residual_missing > 0:
print(f"[{isin}] {residual_missing} valori mancanti non coperti dal forward-fill (limite {gap_ffill_limit}), righe scartate.")
series_ffill = series_ffill.dropna()
coverage_days = series_ffill.shape[0]
if dropped_na > 0 or missing_before_fill > 0:
print(f"[{isin}] Righe totali: {before_rows}, drop NA: {dropped_na}, copertura finale: {coverage_days} giorni.")
return series_ffill
# ========================= # =========================
# SERIE STORICHE RENDIMENTI # SERIE STORICHE RENDIMENTI
# ========================= # =========================
@@ -396,78 +310,35 @@ for isin in df['ISIN'].unique():
if temp_df.empty: if temp_df.empty:
print(f"Nessun dato recuperato per {isin}, skipping...") print(f"Nessun dato recuperato per {isin}, skipping...")
continue continue
clean_series = preprocess_returns(temp_df, isin, all_dates, gap_ffill_limit=GAP_FFILL_LIMIT_DAYS) temp_df['Px_Date'] = pd.to_datetime(temp_df['Px_Date'], format='%Y-%m-%d', errors='coerce').dt.normalize()
if clean_series.empty: temp_df = temp_df.dropna(subset=['Px_Date'])
print(f"Nessun dato valido per {isin} dopo la validazione, asset ignorato.") temp_df.set_index('Px_Date', inplace=True)
continue temp_df['RendimentoGiornaliero'] = temp_df['RendimentoGiornaliero'] / 100
final_df[isin] = clean_series.reindex(all_dates) final_df[isin] = temp_df['RendimentoGiornaliero'].reindex(all_dates)
isin_from_db.add(isin) isin_from_db.add(isin)
non_null = int(final_df[isin].count()) print(f"Dati recuperati per {isin}: {final_df[isin].count()} righe di dati non-null prelevate.")
missing_left = int(final_df[isin].isna().sum())
print(f"Dati recuperati per {isin}: {non_null} righe valide, mancanti residui: {missing_left}.")
except SQLAlchemyError as e: except SQLAlchemyError as e:
print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e) print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e)
final_df = final_df.loc[:, final_df.notna().any()] # elimina asset senza dati utili final_df.fillna(0, inplace=True)
if final_df.shape[1] == 0:
print("Nessun ISIN valido dopo la validazione dei rendimenti, uscita.")
sys.exit(1)
final_df = final_df.dropna(how='all')
# -------- H_min sempre su 5 anni (21 gg = 1 mese) -------- # -------- H_min sempre su 5 anni (21 gg = 1 mese) --------
five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date] five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date]
# ========================= # =========================
# CONFIGURAZIONE OBIETTIVI (da config.yaml con fallback) # CONFIGURAZIONE OBIETTIVI
# ========================= # =========================
DEFAULT_VOL_TARGETS = [ volatility_targets = {
{"years": 5, "target_vol": 0.06, "name": "VAR3_5Y"}, # (1, 0.06): 'VAR3_1Y',
{"years": 1, "target_vol": 0.12, "name": "VAR6_1Y"}, # (3, 0.06): 'VAR3_3Y',
{"years": 3, "target_vol": 0.12, "name": "VAR6_3Y"}, (5, 0.06): 'VAR3_5Y',
{"years": 5, "target_vol": 0.12, "name": "VAR6_5Y"}, (1, 0.12): 'VAR6_1Y',
{"years": 5, "target_vol": 0.18, "name": "VAR9_5Y"}, (3, 0.12): 'VAR6_3Y',
] (5, 0.12): 'VAR6_5Y',
# (1, 0.18): 'VAR9_1Y',
DEFAULT_ASSET_CLASS_LIMITS = { # (3, 0.18): 'VAR9_3Y',
'Azionari': 0.75, 'Obbligazionari': 0.75, (5, 0.18): 'VAR9_5Y'
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.10
} }
def load_targets_and_limits(config_file: str, profile: str = "default"):
"""Legge target di volatilità e limiti asset class dal file di configurazione."""
cfg = {}
try:
with open(config_file, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
except FileNotFoundError:
cfg = {}
vt_cfg = cfg.get("volatility_targets", {})
vt_list = []
if isinstance(vt_cfg, dict):
vt_list = vt_cfg.get(profile) or vt_cfg.get("default") or []
elif isinstance(vt_cfg, list):
vt_list = vt_cfg
if not vt_list:
vt_list = DEFAULT_VOL_TARGETS
volatility_targets_local = {
(int(item["years"]), float(item["target_vol"])): item["name"]
for item in vt_list
if "years" in item and "target_vol" in item and "name" in item
}
asset_limits_cfg = cfg.get("asset_class_limits") or {}
if not asset_limits_cfg:
asset_limits_cfg = DEFAULT_ASSET_CLASS_LIMITS
asset_class_limits_local = {k: float(v) for k, v in asset_limits_cfg.items()}
return volatility_targets_local, asset_class_limits_local
CONFIG_PROFILE = os.getenv("CONFIG_PROFILE", "default")
volatility_targets, asset_class_limits_cfg = load_targets_and_limits(CONFIG_FILE, CONFIG_PROFILE)
days_per_year = 252 days_per_year = 252
riskfree_rate = 0.02 riskfree_rate = 0.02
@@ -561,7 +432,12 @@ for (years, target_vol), name in volatility_targets.items():
ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw) ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw)
# Vincoli per Asset Class # Vincoli per Asset Class
for ac, maxw in asset_class_limits_cfg.items(): asset_class_limits = {
'Azionari': 0.75, 'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.1
}
for ac, maxw in asset_class_limits.items():
isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist() isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist()
idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns] idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns]
if idxs: if idxs:
@@ -611,7 +487,7 @@ for (years, target_vol), name in volatility_targets.items():
print(f"File {output_file_path} saved successfully.") print(f"File {output_file_path} saved successfully.")
# --- Pie chart asset allocation (se ci sono pesi > 0) --- # --- Pie chart asset allocation (se ci sono pesi > 0) ---
asset_allocations = {asset: 0 for asset in asset_class_limits_cfg} asset_allocations = {asset: 0 for asset in asset_class_limits}
for isin, weight in weights.items(): for isin, weight in weights.items():
r_sel = df.loc[df['ISIN'] == isin] r_sel = df.loc[df['ISIN'] == isin]
if r_sel.empty: if r_sel.empty:
@@ -624,6 +500,8 @@ for (years, target_vol), name in volatility_targets.items():
plt.pie(asset_allocations.values(), labels=asset_allocations.keys(), autopct='%1.1f%%') plt.pie(asset_allocations.values(), labels=asset_allocations.keys(), autopct='%1.1f%%')
plt.title(f'Asset Allocation for {name}') plt.title(f'Asset Allocation for {name}')
pie_path = plot_path(f'Asset_Allocation_{name}.png') pie_path = plot_path(f'Asset_Allocation_{name}.png')
if os.path.exists(pie_path):
os.remove(pie_path)
plt.savefig(pie_path, dpi=150, bbox_inches='tight') plt.savefig(pie_path, dpi=150, bbox_inches='tight')
plt.close() plt.close()
@@ -736,6 +614,8 @@ def plot_equity_overlay_all(port_names=None):
plt.legend(loc="best") plt.legend(loc="best")
plt.tight_layout() plt.tight_layout()
out_png = plot_path("Equity_ALL_PORTS.png") out_png = plot_path("Equity_ALL_PORTS.png")
if os.path.exists(out_png):
os.remove(out_png)
plt.savefig(out_png, dpi=150, bbox_inches='tight') plt.savefig(out_png, dpi=150, bbox_inches='tight')
plt.close() plt.close()
print(f"[plot] Grafico sovrapposto salvato: {out_png}") print(f"[plot] Grafico sovrapposto salvato: {out_png}")
@@ -772,6 +652,8 @@ def plot_underwater_overlay_all(port_names=None, ylim=(-0.3, 0.0)):
plt.legend(loc="best") plt.legend(loc="best")
plt.tight_layout() plt.tight_layout()
out_png = plot_path("Underwater_ALL_PORTS.png") out_png = plot_path("Underwater_ALL_PORTS.png")
if os.path.exists(out_png):
os.remove(out_png)
plt.savefig(out_png, dpi=150, bbox_inches='tight') plt.savefig(out_png, dpi=150, bbox_inches='tight')
plt.close() plt.close()
print(f"[underwater] Grafico sovrapposto salvato: {out_png}") print(f"[underwater] Grafico sovrapposto salvato: {out_png}")

View File

@@ -11,8 +11,6 @@ import os
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from dotenv import load_dotenv
import yaml
from sqlalchemy import create_engine, text from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@@ -25,14 +23,10 @@ from pypfopt.exceptions import OptimizationError
OUTPUT_DIR = "Output" OUTPUT_DIR = "Output"
INPUT_DIR = "Input" INPUT_DIR = "Input"
PLOT_DIR = "Plot" PLOT_DIR = "Plot"
CONFIG_FILE = "config.yaml"
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(INPUT_DIR, exist_ok=True) os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(PLOT_DIR, exist_ok=True) os.makedirs(PLOT_DIR, exist_ok=True)
load_dotenv()
GAP_FFILL_LIMIT_DAYS = 5 # forward-fill limit per buchi di calendario
def excel_path(filename: str) -> str: def excel_path(filename: str) -> str:
"""Costruisce il percorso completo per un file Excel nella cartella di output.""" """Costruisce il percorso completo per un file Excel nella cartella di output."""
@@ -193,28 +187,27 @@ def h_min_100(returns: pd.Series, month_len: int = 21):
return np.nan, np.nan return np.nan, np.nan
def load_db_config(): # --- Lettura parametri dal file connection.txt ---
"""Recupera i parametri di connessione dal set di variabili d'ambiente.""" params = {}
required_keys = ["DB_USERNAME", "DB_PASSWORD", "DB_HOST", "DB_NAME"] with open("connection.txt", "r") as f:
missing = [k for k in required_keys if not os.getenv(k)] for line in f:
if missing: line = line.strip()
raise RuntimeError(f"Variabili d'ambiente mancanti per il DB: {', '.join(missing)}") if line and not line.startswith("#"):
key, value = line.split("=", 1)
params[key.strip()] = value.strip()
return { username = params.get("username")
"username": os.getenv("DB_USERNAME"), password = params.get("password")
"password": os.getenv("DB_PASSWORD"), host = params.get("host")
"host": os.getenv("DB_HOST"), port = params.get("port", "1433")
"port": os.getenv("DB_PORT", "1433"), database = params.get("database")
"database": os.getenv("DB_NAME"),
}
db_cfg = load_db_config()
connection_string = ( connection_string = (
f"mssql+pyodbc://{db_cfg['username']}:{db_cfg['password']}@{db_cfg['host']}:{db_cfg['port']}/{db_cfg['database']}" f"mssql+pyodbc://{username}:{password}@{host}:{port}/{database}"
"?driver=ODBC+Driver+17+for+SQL+Server" "?driver=ODBC+Driver+17+for+SQL+Server"
) )
print("Connection string configurata da variabili d'ambiente.") print("Connection string letta correttamente")
# ========================= # =========================
# CONNESSIONE AL DB # CONNESSIONE AL DB
@@ -241,85 +234,6 @@ df = pd.read_excel(
dtype={'Codice Titolo': str} dtype={'Codice Titolo': str}
) )
# =========================
# VALIDAZIONE DATI RENDIMENTI
# =========================
def _gap_ranges_missing(series: pd.Series, all_dates: pd.DatetimeIndex, max_entries: int = 3):
"""Restituisce fino a max_entries intervalli di date mancanti (start, end, len)."""
missing_idx = series[series.isna()].index
if missing_idx.empty:
return []
positions = [all_dates.get_loc(ts) for ts in missing_idx if ts in all_dates]
if not positions:
return []
ranges = []
start_pos = positions[0]
end_pos = positions[0]
for pos in positions[1:]:
if pos == end_pos + 1:
end_pos = pos
else:
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
start_pos = end_pos = pos
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
return ranges[:max_entries]
def preprocess_returns(temp_df: pd.DataFrame, isin: str, all_dates: pd.DatetimeIndex,
gap_ffill_limit: int = GAP_FFILL_LIMIT_DAYS) -> pd.Series:
"""Pulizia/validazione serie rendimenti (tipi, duplicati, buchi, niente fillna(0))."""
required_cols = {"Px_Date", "RendimentoGiornaliero"}
missing_cols = required_cols.difference(temp_df.columns)
if missing_cols:
print(f"[{isin}] Colonne mancanti {missing_cols}, asset ignorato.")
return pd.Series(dtype=float)
df_local = temp_df.copy()
df_local["Px_Date"] = pd.to_datetime(df_local["Px_Date"], errors="coerce").dt.normalize()
df_local["RendimentoGiornaliero"] = pd.to_numeric(df_local["RendimentoGiornaliero"], errors="coerce")
before_rows = len(df_local)
df_local = df_local.dropna(subset=["Px_Date", "RendimentoGiornaliero"])
dropped_na = before_rows - len(df_local)
dup_count = df_local.duplicated(subset=["Px_Date"]).sum()
if dup_count > 0:
print(f"[{isin}] Drop duplicati su Px_Date: {dup_count}")
df_local = df_local.drop_duplicates(subset=["Px_Date"], keep="last").sort_values("Px_Date")
if df_local.empty:
print(f"[{isin}] Nessuna riga valida dopo cleaning, asset ignorato.")
return pd.Series(dtype=float)
df_local["RendimentoGiornaliero"] = df_local["RendimentoGiornaliero"] / 100.0
series = df_local.set_index("Px_Date")["RendimentoGiornaliero"].reindex(all_dates)
missing_before_fill = int(series.isna().sum())
if missing_before_fill > 0:
gaps = _gap_ranges_missing(series, all_dates, max_entries=3)
if gaps:
gaps_str = "; ".join([f"{g[0].date()}->{g[1].date()} ({g[2]} gg)" for g in gaps])
print(f"[{isin}] Date mancanti prima del fill (prime): {gaps_str}")
series_ffill = series.ffill(limit=gap_ffill_limit)
first_valid = series_ffill.first_valid_index()
last_valid = series_ffill.last_valid_index()
if first_valid is None or last_valid is None:
print(f"[{isin}] Serie vuota dopo forward-fill, asset ignorato.")
return pd.Series(dtype=float)
series_ffill = series_ffill.loc[first_valid:last_valid]
residual_missing = int(series_ffill.isna().sum())
if residual_missing > 0:
print(f"[{isin}] {residual_missing} valori mancanti non coperti dal forward-fill (limite {gap_ffill_limit}), righe scartate.")
series_ffill = series_ffill.dropna()
coverage_days = series_ffill.shape[0]
if dropped_na > 0 or missing_before_fill > 0:
print(f"[{isin}] Righe totali: {before_rows}, drop NA: {dropped_na}, copertura finale: {coverage_days} giorni.")
return series_ffill
# ========================= # =========================
# SERIE STORICHE RENDIMENTI # SERIE STORICHE RENDIMENTI
# ========================= # =========================
@@ -338,77 +252,35 @@ for isin in df['ISIN'].unique():
if temp_df.empty: if temp_df.empty:
print(f"Nessun dato recuperato per {isin}, skipping...") print(f"Nessun dato recuperato per {isin}, skipping...")
continue continue
clean_series = preprocess_returns(temp_df, isin, all_dates, gap_ffill_limit=GAP_FFILL_LIMIT_DAYS) temp_df['Px_Date'] = pd.to_datetime(temp_df['Px_Date'], format='%Y-%m-%d', errors='coerce').dt.normalize()
if clean_series.empty: temp_df = temp_df.dropna(subset=['Px_Date'])
print(f"Nessun dato valido per {isin} dopo la validazione, asset ignorato.") temp_df.set_index('Px_Date', inplace=True)
continue temp_df['RendimentoGiornaliero'] = temp_df['RendimentoGiornaliero'] / 100
final_df[isin] = clean_series.reindex(all_dates) final_df[isin] = temp_df['RendimentoGiornaliero'].reindex(all_dates)
isin_from_db.add(isin) isin_from_db.add(isin)
non_null = int(final_df[isin].count()) print(f"Dati recuperati per {isin}: {final_df[isin].count()} righe di dati non-null prelevate.")
missing_left = int(final_df[isin].isna().sum())
print(f"Dati recuperati per {isin}: {non_null} righe valide, mancanti residui: {missing_left}.")
except SQLAlchemyError as e: except SQLAlchemyError as e:
print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e) print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e)
final_df = final_df.loc[:, final_df.notna().any()] # elimina asset senza dati utili final_df.fillna(0, inplace=True)
if final_df.shape[1] == 0:
print("Nessun ISIN valido dopo la validazione dei rendimenti, uscita.")
sys.exit(1)
final_df = final_df.dropna(how='all')
# -------- H_min sempre su 5 anni (21 gg = 1 mese) -------- # -------- H_min sempre su 5 anni (21 gg = 1 mese) --------
five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date] five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date]
# ========================= # =========================
# CONFIGURAZIONE OBIETTIVI (da config.yaml con fallback) # CONFIGURAZIONE OBIETTIVI
# ========================= # =========================
DEFAULT_VOL_TARGETS = [ volatility_targets = {
{"years": 5, "target_vol": 0.06, "name": "VAR3_5Y"}, # (1, 0.06): 'VAR3_1Y',
{"years": 1, "target_vol": 0.12, "name": "VAR6_1Y"}, # (3, 0.06): 'VAR3_3Y',
{"years": 3, "target_vol": 0.12, "name": "VAR6_3Y"}, (5, 0.06): 'VAR3_5Y',
{"years": 5, "target_vol": 0.12, "name": "VAR6_5Y"}, (1, 0.12): 'VAR6_1Y',
{"years": 5, "target_vol": 0.18, "name": "VAR9_5Y"}, (3, 0.12): 'VAR6_3Y',
] (5, 0.12): 'VAR6_5Y',
# (1, 0.18): 'VAR9_1Y',
DEFAULT_ASSET_CLASS_LIMITS = { # (3, 0.18): 'VAR9_3Y',
'Azionari': 0.75, 'Obbligazionari': 0.75, (5, 0.18): 'VAR9_5Y'
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.10
} }
def load_targets_and_limits(config_file: str, profile: str = "default"):
"""Legge target di volatilità e limiti asset class dal file di configurazione."""
cfg = {}
try:
with open(config_file, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
except FileNotFoundError:
cfg = {}
vt_cfg = cfg.get("volatility_targets", {})
vt_list = []
if isinstance(vt_cfg, dict):
vt_list = vt_cfg.get(profile) or vt_cfg.get("default") or []
elif isinstance(vt_cfg, list):
vt_list = vt_cfg
if not vt_list:
vt_list = DEFAULT_VOL_TARGETS
volatility_targets_local = {
(int(item["years"]), float(item["target_vol"])): item["name"]
for item in vt_list
if "years" in item and "target_vol" in item and "name" in item
}
asset_limits_cfg = cfg.get("asset_class_limits") or {}
if not asset_limits_cfg:
asset_limits_cfg = DEFAULT_ASSET_CLASS_LIMITS
asset_class_limits_local = {k: float(v) for k, v in asset_limits_cfg.items()}
return volatility_targets_local, asset_class_limits_local
CONFIG_PROFILE = os.getenv("CONFIG_PROFILE", "default")
volatility_targets, asset_class_limits_cfg = load_targets_and_limits(CONFIG_FILE, CONFIG_PROFILE)
days_per_year = 252 days_per_year = 252
riskfree_rate = 0.02 riskfree_rate = 0.02
@@ -502,11 +374,16 @@ for (years, target_vol), name in volatility_targets.items():
ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw) ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw)
# Vincoli per Asset Class # Vincoli per Asset Class
for ac, maxw in asset_class_limits_cfg.items(): asset_class_limits = {
'Azionari': 0.75, 'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.1
}
for ac, maxw in asset_class_limits.items():
isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist() isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist()
idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns] idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns]
if idxs: if idxs:
ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: w[idxs].sum() <= maxw) ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw)
# ---------- Risoluzione ---------- # ---------- Risoluzione ----------
try: try:
@@ -552,7 +429,7 @@ for (years, target_vol), name in volatility_targets.items():
print(f"File {output_file_path} saved successfully.") print(f"File {output_file_path} saved successfully.")
# --- Pie chart asset allocation: salva in Output senza mostrare --- # --- Pie chart asset allocation: salva in Output senza mostrare ---
asset_allocations = {asset: 0 for asset in asset_class_limits_cfg} asset_allocations = {asset: 0 for asset in ['Azionari', 'Obbligazionari', 'Metalli Preziosi', 'Materie Prime', 'Immobiliare', 'Criptovalute', 'Monetari']}
for isin, weight in weights.items(): for isin, weight in weights.items():
r_sel = df.loc[df['ISIN'] == isin] r_sel = df.loc[df['ISIN'] == isin]
if r_sel.empty: if r_sel.empty:

View File

@@ -1,31 +0,0 @@
# Ottimizzatore - Setup rapido
## Prerequisiti
- Python 3.10+ con pip
- Driver ODBC 17 per SQL Server installato
## Configurazione credenziali
1. Copia `.env.example` in `.env`.
2. Imposta le variabili:
- `DB_USERNAME`
- `DB_PASSWORD`
- `DB_HOST`
- `DB_PORT` (default 1433)
- `DB_NAME`
3. Mantieni `.env` fuori dal controllo versione (gia' ignorato).
## Installazione dipendenze
```
pip install -r requirements.txt
```
## Configurazione portafogli (config.yaml)
- Parametri variabili (target di volatilità, naming portafogli, limiti per Asset Class) stanno in `config.yaml`.
- Profili:
- `default`: usato da tutti gli script principali (v2.6, 2.5.2, Lite).
- `uk`: usato di default da `20240929 Ottimizzatore Versione 2.2 UK.py` (oppure imposta `CONFIG_PROFILE=uk`).
- Per modificare target/limiti, edita `config.yaml` senza toccare il codice. Se il file manca o è incompleto, i default interni vengono usati.
## Esecuzione
Esegui lo script desiderato (es. `python "Sviluppo/20251022 Ottimizzatore Versione 2.6.py"`).
Assicurati che `.env` sia presente o le variabili siano esportate nel processo.

View File

@@ -13,8 +13,6 @@ import os
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from dotenv import load_dotenv
import yaml
from sqlalchemy import create_engine, text from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@@ -27,12 +25,10 @@ from pypfopt.exceptions import OptimizationError
OUTPUT_DIR = "Output" OUTPUT_DIR = "Output"
INPUT_DIR = "Input" INPUT_DIR = "Input"
PLOT_DIR = "Plot" PLOT_DIR = "Plot"
CONFIG_FILE = "config.yaml"
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(INPUT_DIR, exist_ok=True) os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(PLOT_DIR, exist_ok=True) os.makedirs(PLOT_DIR, exist_ok=True)
load_dotenv()
def excel_path(filename: str) -> str: def excel_path(filename: str) -> str:
"""Percorso completo per i file Excel di output.""" """Percorso completo per i file Excel di output."""
@@ -47,60 +43,23 @@ optimized_weights_phase2 = pd.DataFrame()
summary_data_phase2 = [] summary_data_phase2 = []
# ========================= # =========================
# CONFIGURAZIONE OBIETTIVI (da config.yaml con fallback) # CONFIGURAZIONE OBIETTIVI
# ========================= # =========================
DEFAULT_VOL_TARGETS = [ volatility_targets = {
{"years": 5, "target_vol": 0.06, "name": "VAR3_5Y"}, # (1, 0.06): 'VAR3_1Y',
{"years": 1, "target_vol": 0.12, "name": "VAR6_1Y"}, # (3, 0.06): 'VAR3_3Y',
{"years": 3, "target_vol": 0.12, "name": "VAR6_3Y"}, (5, 0.06): 'VAR3_5Y',
{"years": 5, "target_vol": 0.12, "name": "VAR6_5Y"}, (1, 0.12): 'VAR6_1Y',
{"years": 5, "target_vol": 0.18, "name": "VAR9_5Y"}, (3, 0.12): 'VAR6_3Y',
] (5, 0.12): 'VAR6_5Y',
# (1, 0.18): 'VAR9_1Y',
DEFAULT_ASSET_CLASS_LIMITS = { # (3, 0.18): 'VAR9_3Y',
'Azionari': 0.75, 'Obbligazionari': 0.75, (5, 0.18): 'VAR9_5Y'
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.10
} }
def load_targets_and_limits(config_file: str, profile: str = "default"):
"""Legge target di volatilità e limiti asset class dal file di configurazione."""
cfg = {}
try:
with open(config_file, "r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
except FileNotFoundError:
cfg = {}
vt_cfg = cfg.get("volatility_targets", {})
vt_list = []
if isinstance(vt_cfg, dict):
vt_list = vt_cfg.get(profile) or vt_cfg.get("default") or []
elif isinstance(vt_cfg, list):
vt_list = vt_cfg
if not vt_list:
vt_list = DEFAULT_VOL_TARGETS
volatility_targets_local = {
(int(item["years"]), float(item["target_vol"])): item["name"]
for item in vt_list
if "years" in item and "target_vol" in item and "name" in item
}
asset_limits_cfg = cfg.get("asset_class_limits") or {}
if not asset_limits_cfg:
asset_limits_cfg = DEFAULT_ASSET_CLASS_LIMITS
asset_class_limits_local = {k: float(v) for k, v in asset_limits_cfg.items()}
return volatility_targets_local, asset_class_limits_local
CONFIG_PROFILE = os.getenv("CONFIG_PROFILE", "default")
volatility_targets, asset_class_limits_cfg = load_targets_and_limits(CONFIG_FILE, CONFIG_PROFILE)
days_per_year = 252 days_per_year = 252
riskfree_rate = 0.02 riskfree_rate = 0.02
mu_ph2_floor = 0.9 mu_ph2_floor = 0.9
GAP_FFILL_LIMIT_DAYS = 5 # forward-fill limit per buchi di calendario
# --------------------------------- # ---------------------------------
# Utility per R^2 sullequity line # Utility per R^2 sullequity line
@@ -356,85 +315,6 @@ df = pd.read_excel(
dtype={'Codice Titolo': str} dtype={'Codice Titolo': str}
) )
# =========================
# VALIDAZIONE DATI RENDIMENTI
# =========================
def _gap_ranges_missing(series: pd.Series, all_dates: pd.DatetimeIndex, max_entries: int = 3):
"""Restituisce fino a max_entries intervalli di date mancanti (start, end, len)."""
missing_idx = series[series.isna()].index
if missing_idx.empty:
return []
positions = [all_dates.get_loc(ts) for ts in missing_idx if ts in all_dates]
if not positions:
return []
ranges = []
start_pos = positions[0]
end_pos = positions[0]
for pos in positions[1:]:
if pos == end_pos + 1:
end_pos = pos
else:
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
start_pos = end_pos = pos
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
return ranges[:max_entries]
def preprocess_returns(temp_df: pd.DataFrame, isin: str, all_dates: pd.DatetimeIndex,
gap_ffill_limit: int = GAP_FFILL_LIMIT_DAYS) -> pd.Series:
"""Pulizia/validazione serie rendimenti (tipi, duplicati, buchi, niente fillna(0))."""
required_cols = {"Px_Date", "RendimentoGiornaliero"}
missing_cols = required_cols.difference(temp_df.columns)
if missing_cols:
print(f"[{isin}] Colonne mancanti {missing_cols}, asset ignorato.")
return pd.Series(dtype=float)
df_local = temp_df.copy()
df_local["Px_Date"] = pd.to_datetime(df_local["Px_Date"], errors="coerce").dt.normalize()
df_local["RendimentoGiornaliero"] = pd.to_numeric(df_local["RendimentoGiornaliero"], errors="coerce")
before_rows = len(df_local)
df_local = df_local.dropna(subset=["Px_Date", "RendimentoGiornaliero"])
dropped_na = before_rows - len(df_local)
dup_count = df_local.duplicated(subset=["Px_Date"]).sum()
if dup_count > 0:
print(f"[{isin}] Drop duplicati su Px_Date: {dup_count}")
df_local = df_local.drop_duplicates(subset=["Px_Date"], keep="last").sort_values("Px_Date")
if df_local.empty:
print(f"[{isin}] Nessuna riga valida dopo cleaning, asset ignorato.")
return pd.Series(dtype=float)
df_local["RendimentoGiornaliero"] = df_local["RendimentoGiornaliero"] / 100.0
series = df_local.set_index("Px_Date")["RendimentoGiornaliero"].reindex(all_dates)
missing_before_fill = int(series.isna().sum())
if missing_before_fill > 0:
gaps = _gap_ranges_missing(series, all_dates, max_entries=3)
if gaps:
gaps_str = "; ".join([f"{g[0].date()}->{g[1].date()} ({g[2]} gg)" for g in gaps])
print(f"[{isin}] Date mancanti prima del fill (prime): {gaps_str}")
series_ffill = series.ffill(limit=gap_ffill_limit)
first_valid = series_ffill.first_valid_index()
last_valid = series_ffill.last_valid_index()
if first_valid is None or last_valid is None:
print(f"[{isin}] Serie vuota dopo forward-fill, asset ignorato.")
return pd.Series(dtype=float)
series_ffill = series_ffill.loc[first_valid:last_valid]
residual_missing = int(series_ffill.isna().sum())
if residual_missing > 0:
print(f"[{isin}] {residual_missing} valori mancanti non coperti dal forward-fill (limite {gap_ffill_limit}), righe scartate.")
series_ffill = series_ffill.dropna()
coverage_days = series_ffill.shape[0]
if dropped_na > 0 or missing_before_fill > 0:
print(f"[{isin}] Righe totali: {before_rows}, drop NA: {dropped_na}, copertura finale: {coverage_days} giorni.")
return series_ffill
# ========================= # =========================
# SERIE STORICHE RENDIMENTI # SERIE STORICHE RENDIMENTI
# ========================= # =========================
@@ -453,23 +333,17 @@ for isin in df['ISIN'].unique():
if temp_df.empty: if temp_df.empty:
print(f"Nessun dato recuperato per {isin}, skipping...") print(f"Nessun dato recuperato per {isin}, skipping...")
continue continue
clean_series = preprocess_returns(temp_df, isin, all_dates, gap_ffill_limit=GAP_FFILL_LIMIT_DAYS) temp_df['Px_Date'] = pd.to_datetime(temp_df['Px_Date'], format='%Y-%m-%d', errors='coerce').dt.normalize()
if clean_series.empty: temp_df = temp_df.dropna(subset=['Px_Date'])
print(f"Nessun dato valido per {isin} dopo la validazione, asset ignorato.") temp_df.set_index('Px_Date', inplace=True)
continue temp_df['RendimentoGiornaliero'] = temp_df['RendimentoGiornaliero'] / 100
final_df[isin] = clean_series.reindex(all_dates) final_df[isin] = temp_df['RendimentoGiornaliero'].reindex(all_dates)
isin_from_db.add(isin) isin_from_db.add(isin)
non_null = int(final_df[isin].count()) print(f"Dati recuperati per {isin}: {final_df[isin].count()} righe di dati non-null prelevate.")
missing_left = int(final_df[isin].isna().sum())
print(f"Dati recuperati per {isin}: {non_null} righe valide, mancanti residui: {missing_left}.")
except SQLAlchemyError as e: except SQLAlchemyError as e:
print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e) print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e)
final_df = final_df.loc[:, final_df.notna().any()] # elimina asset senza dati utili final_df.fillna(0, inplace=True)
if final_df.shape[1] == 0:
print("Nessun ISIN valido dopo la validazione dei rendimenti, uscita.")
sys.exit(1)
final_df = final_df.dropna(how='all')
# -------- H_min sempre su 5 anni (21 gg = 1 mese) -------- # -------- H_min sempre su 5 anni (21 gg = 1 mese) --------
five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date] five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date]
@@ -564,7 +438,12 @@ for (years, target_vol), name in volatility_targets.items():
ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw) ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw)
# Vincoli per Asset Class # Vincoli per Asset Class
for ac, maxw in asset_class_limits_cfg.items(): asset_class_limits = {
'Azionari': 0.75, 'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.1
}
for ac, maxw in asset_class_limits.items():
isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist() isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist()
idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns] idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns]
if idxs: if idxs:
@@ -605,16 +484,13 @@ for (years, target_vol), name in volatility_targets.items():
results_rows.append(row) results_rows.append(row)
results_full_df = pd.DataFrame(results_rows, columns=template_cols) results_full_df = pd.DataFrame(results_rows, columns=template_cols)
if results_full_df.empty: output_df = pd.concat([template_df.iloc[0:0], results_full_df], ignore_index=True)
output_df = template_df.iloc[0:0].copy()
else:
output_df = results_full_df.reindex(columns=template_cols)
output_file_path = excel_path(f'PTFOPT{name}.xlsx') output_file_path = excel_path(f'PTFOPT{name}.xlsx')
output_df.to_excel(output_file_path, index=False) output_df.to_excel(output_file_path, index=False)
print(f"File {output_file_path} saved successfully.") print(f"File {output_file_path} saved successfully.")
# --- Pie chart asset allocation (se ci sono pesi > 0) --- # --- Pie chart asset allocation (se ci sono pesi > 0) ---
asset_allocations = {asset: 0 for asset in asset_class_limits_cfg} asset_allocations = {asset: 0 for asset in asset_class_limits}
for isin, weight in weights.items(): for isin, weight in weights.items():
r_sel = df.loc[df['ISIN'] == isin] r_sel = df.loc[df['ISIN'] == isin]
if r_sel.empty: if r_sel.empty:
@@ -831,7 +707,12 @@ if cp is not None:
if idxs: if idxs:
ef_h.add_constraint(lambda w, idxs=idxs, maxw=maxw: cp.sum(w[idxs]) <= maxw) ef_h.add_constraint(lambda w, idxs=idxs, maxw=maxw: cp.sum(w[idxs]) <= maxw)
for ac, maxw in asset_class_limits_cfg.items(): asset_class_limits = {
'Azionari': 0.75, 'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.1
}
for ac, maxw in asset_class_limits.items():
isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist() isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist()
idxs = [period_df_p.columns.get_loc(isin) for isin in isin_list if isin in period_df_p.columns] idxs = [period_df_p.columns.get_loc(isin) for isin in isin_list if isin in period_df_p.columns]
if idxs: if idxs:
@@ -992,12 +873,9 @@ else:
row['peso'] = float(weight * 99) # allineato a Fase 1 row['peso'] = float(weight * 99) # allineato a Fase 1
results_rows.append(row) results_rows.append(row)
# Prepara il foglio con l'intestazione del template + righe risultato # Prepara il foglio con lintestazione del template + righe risultato
results_full_df = pd.DataFrame(results_rows, columns=template_cols) results_full_df = pd.DataFrame(results_rows, columns=template_cols)
if results_full_df.empty: output_df = pd.concat([template_df.iloc[0:0], results_full_df], ignore_index=True)
output_df = template_df.iloc[0:0].copy()
else:
output_df = results_full_df.reindex(columns=template_cols)
# NOME FILE: identico al naming di Fase 1 # NOME FILE: identico al naming di Fase 1
output_file_path = excel_path(f'PTFOPT{name}_PH2.xlsx') output_file_path = excel_path(f'PTFOPT{name}_PH2.xlsx')

View File

@@ -15,6 +15,7 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from dotenv import load_dotenv from dotenv import load_dotenv
import yaml import yaml
import logging
from sqlalchemy import create_engine, text from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@@ -33,6 +34,8 @@ os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(INPUT_DIR, exist_ok=True) os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(PLOT_DIR, exist_ok=True) os.makedirs(PLOT_DIR, exist_ok=True)
load_dotenv() load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
def excel_path(filename: str) -> str: def excel_path(filename: str) -> str:
"""Percorso completo per i file Excel di output.""" """Percorso completo per i file Excel di output."""
@@ -47,7 +50,7 @@ optimized_weights_phase1_heal = pd.DataFrame()
summary_data_phase1_heal = [] summary_data_phase1_heal = []
# ========================= # =========================
# CONFIGURAZIONE OBIETTIVI (da config.yaml con fallback) # CONFIGURAZIONE OBIETTIVI (config esterna + fallback) E VALIDAZIONE
# ========================= # =========================
DEFAULT_VOL_TARGETS = [ DEFAULT_VOL_TARGETS = [
{"years": 5, "target_vol": 0.06, "name": "VAR3_5Y"}, {"years": 5, "target_vol": 0.06, "name": "VAR3_5Y"},
@@ -101,7 +104,30 @@ days_per_year = 252
riskfree_rate = 0.02 riskfree_rate = 0.02
mu_heal_floor = 0.85 mu_heal_floor = 0.85
GAP_FFILL_LIMIT_DAYS = 5 # forward-fill limit per buchi di calendario
def validate_universe(df_universe: pd.DataFrame):
required_cols = ['ISIN', 'Nome', 'Categoria', 'Asset Class']
missing_cols = [c for c in required_cols if c not in df_universe.columns]
if missing_cols:
logger.error("Colonne mancanti nel file universo: %s", ", ".join(missing_cols))
sys.exit(1)
dup_isin = df_universe['ISIN'][df_universe['ISIN'].duplicated()].unique().tolist()
if dup_isin:
logger.warning("ISIN duplicati nel file universo: %s", dup_isin)
empty_isin = df_universe['ISIN'].isna().sum()
if empty_isin:
logger.warning("Righe con ISIN mancante nel file universo: %d", int(empty_isin))
def validate_returns_frame(df_returns: pd.DataFrame, threshold: float = 0.2):
if df_returns.empty:
logger.error("Nessun dato di rendimento recuperato: final_df vuoto.")
sys.exit(1)
na_ratio = df_returns.isna().mean()
high_na = na_ratio[na_ratio > threshold]
if not high_na.empty:
logger.warning("Colonne con >%.0f%% di NaN prima del fill: %s",
threshold * 100,
", ".join([f"{c} ({v:.0%})" for c, v in high_na.items()]))
# --------------------------------- # ---------------------------------
# Utility per R^2 sullequity line # Utility per R^2 sullequity line
@@ -310,28 +336,27 @@ def portfolio_path_metrics(period_df: pd.DataFrame,
"Hmin_100m_5Y": hmin_5y_months "Hmin_100m_5Y": hmin_5y_months
} }
def load_db_config(): # --- Lettura parametri dal file connection.txt ---
"""Recupera i parametri di connessione dal set di variabili d'ambiente.""" params = {}
required_keys = ["DB_USERNAME", "DB_PASSWORD", "DB_HOST", "DB_NAME"] with open("connection.txt", "r") as f:
missing = [k for k in required_keys if not os.getenv(k)] for line in f:
if missing: line = line.strip()
raise RuntimeError(f"Variabili d'ambiente mancanti per il DB: {', '.join(missing)}") if line and not line.startswith("#"):
key, value = line.split("=", 1)
params[key.strip()] = value.strip()
return { username = params.get("username")
"username": os.getenv("DB_USERNAME"), password = params.get("password")
"password": os.getenv("DB_PASSWORD"), host = params.get("host")
"host": os.getenv("DB_HOST"), port = params.get("port", "1433")
"port": os.getenv("DB_PORT", "1433"), database = params.get("database")
"database": os.getenv("DB_NAME"),
}
db_cfg = load_db_config()
connection_string = ( connection_string = (
f"mssql+pyodbc://{db_cfg['username']}:{db_cfg['password']}@{db_cfg['host']}:{db_cfg['port']}/{db_cfg['database']}" f"mssql+pyodbc://{username}:{password}@{host}:{port}/{database}"
"?driver=ODBC+Driver+17+for+SQL+Server" "?driver=ODBC+Driver+17+for+SQL+Server"
) )
print("Connection string configurata da variabili d'ambiente.") print("Connection string letta correttamente")
# ========================= # =========================
# CONNESSIONE AL DB # CONNESSIONE AL DB
@@ -357,85 +382,7 @@ df = pd.read_excel(
usecols=['ISIN', 'Nome', 'Categoria', 'Asset Class', 'PesoMax', 'PesoFisso', 'Codice Titolo'], usecols=['ISIN', 'Nome', 'Categoria', 'Asset Class', 'PesoMax', 'PesoFisso', 'Codice Titolo'],
dtype={'Codice Titolo': str} dtype={'Codice Titolo': str}
) )
validate_universe(df)
# =========================
# VALIDAZIONE DATI RENDIMENTI
# =========================
def _gap_ranges_missing(series: pd.Series, all_dates: pd.DatetimeIndex, max_entries: int = 3):
"""Restituisce fino a max_entries intervalli di date mancanti (start, end, len)."""
missing_idx = series[series.isna()].index
if missing_idx.empty:
return []
positions = [all_dates.get_loc(ts) for ts in missing_idx if ts in all_dates]
if not positions:
return []
ranges = []
start_pos = positions[0]
end_pos = positions[0]
for pos in positions[1:]:
if pos == end_pos + 1:
end_pos = pos
else:
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
start_pos = end_pos = pos
ranges.append((all_dates[start_pos], all_dates[end_pos], end_pos - start_pos + 1))
return ranges[:max_entries]
def preprocess_returns(temp_df: pd.DataFrame, isin: str, all_dates: pd.DatetimeIndex,
gap_ffill_limit: int = GAP_FFILL_LIMIT_DAYS) -> pd.Series:
"""Pulizia/validazione serie rendimenti (tipi, duplicati, buchi, niente fillna(0))."""
required_cols = {"Px_Date", "RendimentoGiornaliero"}
missing_cols = required_cols.difference(temp_df.columns)
if missing_cols:
print(f"[{isin}] Colonne mancanti {missing_cols}, asset ignorato.")
return pd.Series(dtype=float)
df_local = temp_df.copy()
df_local["Px_Date"] = pd.to_datetime(df_local["Px_Date"], errors="coerce").dt.normalize()
df_local["RendimentoGiornaliero"] = pd.to_numeric(df_local["RendimentoGiornaliero"], errors="coerce")
before_rows = len(df_local)
df_local = df_local.dropna(subset=["Px_Date", "RendimentoGiornaliero"])
dropped_na = before_rows - len(df_local)
dup_count = df_local.duplicated(subset=["Px_Date"]).sum()
if dup_count > 0:
print(f"[{isin}] Drop duplicati su Px_Date: {dup_count}")
df_local = df_local.drop_duplicates(subset=["Px_Date"], keep="last").sort_values("Px_Date")
if df_local.empty:
print(f"[{isin}] Nessuna riga valida dopo cleaning, asset ignorato.")
return pd.Series(dtype=float)
df_local["RendimentoGiornaliero"] = df_local["RendimentoGiornaliero"] / 100.0
series = df_local.set_index("Px_Date")["RendimentoGiornaliero"].reindex(all_dates)
missing_before_fill = int(series.isna().sum())
if missing_before_fill > 0:
gaps = _gap_ranges_missing(series, all_dates, max_entries=3)
if gaps:
gaps_str = "; ".join([f"{g[0].date()}->{g[1].date()} ({g[2]} gg)" for g in gaps])
print(f"[{isin}] Date mancanti prima del fill (prime): {gaps_str}")
series_ffill = series.ffill(limit=gap_ffill_limit)
first_valid = series_ffill.first_valid_index()
last_valid = series_ffill.last_valid_index()
if first_valid is None or last_valid is None:
print(f"[{isin}] Serie vuota dopo forward-fill, asset ignorato.")
return pd.Series(dtype=float)
series_ffill = series_ffill.loc[first_valid:last_valid]
residual_missing = int(series_ffill.isna().sum())
if residual_missing > 0:
print(f"[{isin}] {residual_missing} valori mancanti non coperti dal forward-fill (limite {gap_ffill_limit}), righe scartate.")
series_ffill = series_ffill.dropna()
coverage_days = series_ffill.shape[0]
if dropped_na > 0 or missing_before_fill > 0:
print(f"[{isin}] Righe totali: {before_rows}, drop NA: {dropped_na}, copertura finale: {coverage_days} giorni.")
return series_ffill
# ========================= # =========================
# SERIE STORICHE RENDIMENTI # SERIE STORICHE RENDIMENTI
@@ -455,23 +402,18 @@ for isin in df['ISIN'].unique():
if temp_df.empty: if temp_df.empty:
print(f"Nessun dato recuperato per {isin}, skipping...") print(f"Nessun dato recuperato per {isin}, skipping...")
continue continue
clean_series = preprocess_returns(temp_df, isin, all_dates, gap_ffill_limit=GAP_FFILL_LIMIT_DAYS) temp_df['Px_Date'] = pd.to_datetime(temp_df['Px_Date'], format='%Y-%m-%d', errors='coerce').dt.normalize()
if clean_series.empty: temp_df = temp_df.dropna(subset=['Px_Date'])
print(f"Nessun dato valido per {isin} dopo la validazione, asset ignorato.") temp_df.set_index('Px_Date', inplace=True)
continue temp_df['RendimentoGiornaliero'] = temp_df['RendimentoGiornaliero'] / 100
final_df[isin] = clean_series.reindex(all_dates) final_df[isin] = temp_df['RendimentoGiornaliero'].reindex(all_dates)
isin_from_db.add(isin) isin_from_db.add(isin)
non_null = int(final_df[isin].count()) print(f"Dati recuperati per {isin}: {final_df[isin].count()} righe di dati non-null prelevate.")
missing_left = int(final_df[isin].isna().sum())
print(f"Dati recuperati per {isin}: {non_null} righe valide, mancanti residui: {missing_left}.")
except SQLAlchemyError as e: except SQLAlchemyError as e:
print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e) print(f"Errore durante l'esecuzione della stored procedure per {isin}:", e)
final_df = final_df.loc[:, final_df.notna().any()] # elimina asset senza dati utili validate_returns_frame(final_df)
if final_df.shape[1] == 0: final_df.fillna(0, inplace=True)
print("Nessun ISIN valido dopo la validazione dei rendimenti, uscita.")
sys.exit(1)
final_df = final_df.dropna(how='all')
# -------- H_min sempre su 5 anni (21 gg = 1 mese) -------- # -------- H_min sempre su 5 anni (21 gg = 1 mese) --------
five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date] five_year_df = final_df.loc[end_date - pd.DateOffset(years=5): end_date]
@@ -566,7 +508,12 @@ for (years, target_vol), name in volatility_targets.items():
ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw) ef.add_constraint(lambda w, idxs=idxs, maxw=maxw: sum(w[i] for i in idxs) <= maxw)
# Vincoli per Asset Class # Vincoli per Asset Class
for ac, maxw in asset_class_limits_cfg.items(): asset_class_limits = {
'Azionari': 0.75, 'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.1
}
for ac, maxw in asset_class_limits.items():
isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist() isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist()
idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns] idxs = [period_df.columns.get_loc(isin) for isin in isin_list if isin in period_df.columns]
if idxs: if idxs:
@@ -607,16 +554,13 @@ for (years, target_vol), name in volatility_targets.items():
results_rows.append(row) results_rows.append(row)
results_full_df = pd.DataFrame(results_rows, columns=template_cols) results_full_df = pd.DataFrame(results_rows, columns=template_cols)
if results_full_df.empty: output_df = pd.concat([template_df.iloc[0:0], results_full_df], ignore_index=True)
output_df = template_df.iloc[0:0].copy()
else:
output_df = results_full_df.reindex(columns=template_cols)
output_file_path = excel_path(f'PTFOPT{name}.xlsx') output_file_path = excel_path(f'PTFOPT{name}.xlsx')
output_df.to_excel(output_file_path, index=False) output_df.to_excel(output_file_path, index=False)
print(f"File {output_file_path} saved successfully.") print(f"File {output_file_path} saved successfully.")
# --- Pie chart asset allocation (se ci sono pesi > 0) --- # --- Pie chart asset allocation (se ci sono pesi > 0) ---
asset_allocations = {asset: 0 for asset in asset_class_limits_cfg} asset_allocations = {asset: 0 for asset in asset_class_limits}
for isin, weight in weights.items(): for isin, weight in weights.items():
r_sel = df.loc[df['ISIN'] == isin] r_sel = df.loc[df['ISIN'] == isin]
if r_sel.empty: if r_sel.empty:
@@ -833,7 +777,12 @@ if cp is not None:
if idxs: if idxs:
ef_h.add_constraint(lambda w, idxs=idxs, maxw=maxw: cp.sum(w[idxs]) <= maxw) ef_h.add_constraint(lambda w, idxs=idxs, maxw=maxw: cp.sum(w[idxs]) <= maxw)
for ac, maxw in asset_class_limits_cfg.items(): asset_class_limits = {
'Azionari': 0.75, 'Obbligazionari': 0.75,
'Metalli Preziosi': 0.20, 'Materie Prime': 0.05,
'Immobiliare': 0.05, 'Criptovalute': 0.05, 'Monetari': 0.1
}
for ac, maxw in asset_class_limits.items():
isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist() isin_list = df[df['Asset Class'] == ac]['ISIN'].tolist()
idxs = [period_df_p.columns.get_loc(isin) for isin in isin_list if isin in period_df_p.columns] idxs = [period_df_p.columns.get_loc(isin) for isin in isin_list if isin in period_df_p.columns]
if idxs: if idxs:

View File

@@ -28,18 +28,12 @@
- CPU: covarianze e Solvers EfficientFrontier per ogni combinazione durata/volatilita'. - CPU: covarianze e Solvers EfficientFrontier per ogni combinazione durata/volatilita'.
- IO file: generazione multipla di Excel e plot per portafoglio. - IO file: generazione multipla di Excel e plot per portafoglio.
## Avanzamento miglioramenti ## Direzioni di miglioramento (linee guida, non implementate)
- [x] Modularizzazione base e riuso funzioni (fetch dati, metriche, optimizer, export) tra 2.6/2.5.x/Lite. - Sicurezza: rimuovere/securizzare `connection.txt`, usare variabili ambiente o secret store; separare credenziali dal repo.
- [x] Configurazione esterna: target, vincoli e naming in `config.yaml`; costanti centralizzate. - Configurazione: estrarre target/vincoli in file di config (yaml/json); centralizzare costanti.
- [x] Sicurezza credenziali DB: rimosso uso diretto di `connection.txt`, lettura da `.env`/variabili ambiente nel codice MSSQL. - Gestione dipendenze: aggiungere requirements/lockfile e script setup.
- [ ] Validazione dati su input/rendimenti (in corso). - Architettura: modularizzare (data fetch, metriche, optimizer, export), riuso tra versioni, eliminare duplicati.
- [ ] Performance/caching/parallel: da attivare dopo la validazione. - Dati: validazione input, gestione missing diversa da fillna(0), log warning sui buchi/ISIN mancanti.
- [ ] Testing/logging/observability strutturata: da aggiungere dopo stabilizzazione schema dati. - Performance: fetch batch/caching, eventuale parallelizzazione o memoization di covarianze/metriche.
- Testing/qualita': introdurre test per metriche e vincoli, logging strutturato, controlli su overwrite output.
## Piano dettaglio: Validazione dati (punto 4) - Observability: report riepilogo a schermo/logs strutturati piu' dei soli print.
- Tipi e schema: forzare date in datetime, rendimenti numerici, errore se colonne richieste mancano; sorting per data.
- Duplicati: drop duplicati per (ISIN, data) con log warning del conteggio scartato.
- Periodi mancanti: rilevare buchi di calendario per ISIN, log warning con range; opzionale reindex business days + forward-fill limitato.
- Missing values: vietato `fillna(0)` sui rendimenti; drop NA iniziali/finali, forward-fill limitato (es. 5 giorni) per buchi interni, altrimenti drop e log.
- ISIN mancanti: se la stored proc non ritorna dati o la serie e' vuota, log warning e saltare l'asset dall'ottimizzazione.
- Report: riepilogo per profilo/ISIN con righe scartate, buchi individuati e fill applicati; stampa su stdout e file in `Output/` per audit.