Support embargo parameter when building pattern library

2025-11-17 15:16:40 +01:00
parent 5f17a80044
commit cb2c735ca9
1 changed files with 24 additions and 2 deletions
--- a/shared_utils.py
+++ b/shared_utils.py
@@ -73,14 +73,36 @@ def z_norm(arr: np.ndarray) -> Optional[np.ndarray]:
    return (arr - mu) / (sd + 1e-12)


-def build_pattern_library(ret_series: pd.Series, wp: int, ha: int) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+def build_pattern_library(
+    ret_series: pd.Series,
+    wp: int,
+    ha: int,
+    embargo: Optional[int] = None,
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+    """Create the normalized pattern windows and their realized outcomes.
+
+    Args:
+        ret_series: Series of returns (ordered oldest→latest).
+        wp: Window length for the pattern.
+        ha: Holding horizon used to compute the outcome.
+        embargo: Optional number of most-recent observations to exclude when
+            building the library (useful to avoid leakage when reusing the
+            same series for inference).
+    """
    x = ret_series.dropna().values
    n = len(x)
    if n < wp + ha + 10:
        return None, None
+    embargo = int(embargo or 0)
+    usable_n = n - max(0, embargo)
+    if usable_n <= wp + ha:
+        return None, None
    wins: List[np.ndarray] = []
    outs: List[float] = []
-    for t in range(0, n - wp - ha):
+    last_start = usable_n - wp - ha
+    if last_start <= 0:
+        return None, None
+    for t in range(0, last_start + 1):
        win = x[t : t + wp]
        winzn = z_norm(win)
        if winzn is None: