feat: Lifetime PD (50yr) - Belkin & Suchower + Vasicek model

- Belkin & Suchower (1998) credit cycle index (Zt) estimation via WLS - Vasicek single-factor conditional PD/TM model - Macro-Zt OLS regression with stepwise variable selection - 3-scenario (boom/neutral/recession) 50yr PD projection - Statistical validation suite (ADF, Ljung-Box, R2, ARCH) - BOK ECOS API integration with fallback data - Visualization module (7 chart types) - Detailed theoretical methodology docs/methodology.md
2026-03-10 21:57:34 +09:00
commit 3a9374c61a
39 changed files with 4671 additions and 0 deletions
--- a/models/vasicek.py
+++ b/models/vasicek.py
@@ -0,0 +1,218 @@
+"""
+Vasicek 단일팩터 모델 기반 조건부 PD 및 전이행렬 모듈
+
+핵심 공식:
+  PD_PIT(Z) = Φ( (Φ⁻¹(PD_TTC) - √ρ · Z) / √(1-ρ) )
+
+이 모듈은 Belkin & Suchower의 임계값 방식 대신, 
+Vasicek 공식을 직접 적용하는 간편 버전도 제공합니다.
+
+참고문헌:
+- Vasicek, O. (2002). "The Distribution of Loan Portfolio Value"
+- Basel Committee (2005). "An Explanatory Note on the Basel II IRB Risk Weight Functions"
+- Merton, R.C. (1974). "On the Pricing of Corporate Debt"
+"""
+
+import numpy as np
+from scipy.stats import norm
+from typing import Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def conditional_pd(pd_ttc: float, z: float, rho: float) -> float:
+    """
+    Vasicek 공식으로 PIT PD 계산
+    
+    PD_PIT(Z) = Φ( (Φ⁻¹(PD_TTC) - √ρ · Z) / √(1-ρ) )
+    
+    Parameters
+    ----------
+    pd_ttc : float  - TTC (Through-the-Cycle) 부도확률
+    z : float       - 체계적 요인 (Z > 0: 호황, Z < 0: 불황)
+    rho : float     - 자산상관계수 (0 < ρ < 1)
+    
+    Returns
+    -------
+    float : PIT (Point-in-Time) 부도확률
+    
+    Examples
+    --------
+    >>> conditional_pd(0.02, 0, 0.20)   # Z=0이면 PD_PIT = PD_TTC
+    0.02
+    >>> conditional_pd(0.02, -2, 0.20)  # 불황시 PD 상승
+    0.1016...
+    >>> conditional_pd(0.02, 2, 0.20)   # 호황시 PD 하락
+    0.0024...
+    """
+    if pd_ttc <= 0:
+        return 0.0
+    if pd_ttc >= 1:
+        return 1.0
+    
+    sqrt_rho = np.sqrt(rho)
+    sqrt_1_rho = np.sqrt(1.0 - rho)
+    
+    numerator = norm.ppf(pd_ttc) - sqrt_rho * z
+    pd_pit = norm.cdf(numerator / sqrt_1_rho)
+    
+    return float(np.clip(pd_pit, 0.0, 1.0))
+
+
+def conditional_pd_array(pd_ttc_array: np.ndarray, z: float, rho: float) -> np.ndarray:
+    """
+    벡터화된 Vasicek 공식 (등급별 TTC PD 배열 → PIT PD 배열)
+    """
+    pd_ttc_clipped = np.clip(pd_ttc_array, 1e-10, 1.0 - 1e-10)
+    
+    sqrt_rho = np.sqrt(rho)
+    sqrt_1_rho = np.sqrt(1.0 - rho)
+    
+    numerator = norm.ppf(pd_ttc_clipped) - sqrt_rho * z
+    pd_pit = norm.cdf(numerator / sqrt_1_rho)
+    
+    return np.clip(pd_pit, 0.0, 1.0)
+
+
+def conditional_transition_matrix(
+    ttc_tm: np.ndarray,
+    z: float,
+    rho: float
+) -> np.ndarray:
+    """
+    임계값 기반 Z-조건부 전이행렬 산출
+    
+    TTC 전이행렬로부터 누적확률 임계값을 산출하고,
+    Z 값에 따라 조건부 전이확률을 계산합니다.
+    
+    Parameters
+    ----------
+    ttc_tm : np.ndarray  - N×N TTC 전이행렬
+    z : float            - 체계적 요인
+    rho : float          - 자산상관계수
+    
+    Returns
+    -------
+    np.ndarray : N×N 조건부 전이행렬
+    """
+    n = ttc_tm.shape[0]
+    sqrt_rho = np.sqrt(rho)
+    sqrt_1_rho = np.sqrt(1.0 - rho)
+    
+    # 임계값 산출 (누적확률 → Φ⁻¹)
+    thresholds = np.full((n, n), np.inf)
+    for i in range(n):
+        cum_prob = 0.0
+        for j in range(n - 1):
+            cum_prob += ttc_tm[i, j]
+            cum_prob_clipped = np.clip(cum_prob, 1e-10, 1.0 - 1e-10)
+            thresholds[i, j] = norm.ppf(cum_prob_clipped)
+    
+    # 조건부 전이행렬 계산
+    cond_tm = np.zeros((n, n))
+    
+    for i in range(n - 1):
+        for j in range(n):
+            d_upper = thresholds[i, j]
+            upper = norm.cdf((d_upper - sqrt_rho * z) / sqrt_1_rho)
+            
+            if j == 0:
+                lower = 0.0
+            else:
+                d_lower = thresholds[i, j - 1]
+                lower = norm.cdf((d_lower - sqrt_rho * z) / sqrt_1_rho)
+            
+            cond_tm[i, j] = max(upper - lower, 0.0)
+        
+        # 행 합 정규화
+        row_sum = cond_tm[i].sum()
+        if row_sum > 0:
+            cond_tm[i] /= row_sum
+    
+    # D행: 흡수상태
+    cond_tm[-1, -1] = 1.0
+    
+    return cond_tm
+
+
+def multi_period_pd(
+    annual_tm: np.ndarray,
+    horizon: int,
+    initial_grade_idx: Optional[int] = None
+) -> np.ndarray:
+    """
+    전이행렬 거듭제곱으로 다기간 누적/한계 PD 계산
+    
+    Parameters
+    ----------
+    annual_tm : np.ndarray   - 1년 전이행렬
+    horizon : int             - 예측 기간 (년)
+    initial_grade_idx : int   - 특정 등급만 계산 (None이면 전체)
+    
+    Returns
+    -------
+    np.ndarray
+        shape (horizon, N-1): 연도별 각 등급의 누적 PD
+        또는 shape (horizon,): 특정 등급의 누적 PD
+    """
+    n = annual_tm.shape[0]
+    cumulative_tm = np.eye(n)
+    
+    cumulative_pds = []
+    for t in range(1, horizon + 1):
+        cumulative_tm = cumulative_tm @ annual_tm
+        # 부도열(마지막 열)이 누적 PD
+        if initial_grade_idx is not None:
+            cumulative_pds.append(cumulative_tm[initial_grade_idx, -1])
+        else:
+            cumulative_pds.append(cumulative_tm[:-1, -1].copy())
+    
+    return np.array(cumulative_pds)
+
+
+def marginal_pd_from_cumulative(cumulative_pds: np.ndarray) -> np.ndarray:
+    """
+    누적 PD에서 한계 PD(Marginal PD) 계산
+    
+    Marginal PD(t) = Cumulative PD(t) - Cumulative PD(t-1)
+    """
+    if cumulative_pds.ndim == 1:
+        marginal = np.diff(cumulative_pds, prepend=0.0)
+    else:
+        first_row = np.zeros((1, cumulative_pds.shape[1]))
+        marginal = np.diff(cumulative_pds, axis=0, prepend=first_row)
+    
+    return np.maximum(marginal, 0.0)
+
+
+def survival_probability(cumulative_pds: np.ndarray) -> np.ndarray:
+    """생존확률 = 1 - 누적 PD"""
+    return 1.0 - cumulative_pds
+
+
+def annualized_pd(cumulative_pd: float, horizon: int) -> float:
+    """
+    누적 PD를 연환산 PD로 변환
+    
+    AnnualizedPD = 1 - (1 - CumulativePD)^(1/horizon)
+    """
+    if cumulative_pd >= 1.0:
+        return 1.0
+    return 1.0 - (1.0 - cumulative_pd) ** (1.0 / horizon)
+
+
+def worst_case_pd(pd_ttc: float, rho: float, confidence: float = 0.999) -> float:
+    """
+    Basel II IRB 방식 Worst-Case PD (99.9% 신뢰수준)
+    
+    WCPD = Φ( (Φ⁻¹(PD) + √ρ · Φ⁻¹(confidence)) / √(1-ρ) )
+    """
+    if pd_ttc <= 0:
+        return 0.0
+    
+    sqrt_rho = np.sqrt(rho)
+    sqrt_1_rho = np.sqrt(1.0 - rho)
+    
+    numerator = norm.ppf(pd_ttc) + sqrt_rho * norm.ppf(confidence)
+    return float(norm.cdf(numerator / sqrt_1_rho))