LifetimePD/models/vasicek.py

"""
Vasicek 단일팩터 모델 기반 조건부 PD 및 전이행렬 모듈

핵심 공식:
  PD_PIT(Z) = Φ( (Φ⁻¹(PD_TTC) - √ρ · Z) / √(1-ρ) )

이 모듈은 Belkin & Suchower의 임계값 방식 대신,
Vasicek 공식을 직접 적용하는 간편 버전도 제공합니다.

참고문헌:
- Vasicek, O. (2002). "The Distribution of Loan Portfolio Value"
- Basel Committee (2005). "An Explanatory Note on the Basel II IRB Risk Weight Functions"
- Merton, R.C. (1974). "On the Pricing of Corporate Debt"
"""

import numpy as np
from scipy.stats import norm
from typing import Optional
import logging

logger = logging.getLogger(__name__)


def conditional_pd(pd_ttc: float, z: float, rho: float) -> float:
    """
    Vasicek 공식으로 PIT PD 계산

    PD_PIT(Z) = Φ( (Φ⁻¹(PD_TTC) - √ρ · Z) / √(1-ρ) )

    Parameters
    ----------
    pd_ttc : float  - TTC (Through-the-Cycle) 부도확률
    z : float       - 체계적 요인 (Z > 0: 호황, Z < 0: 불황)
    rho : float     - 자산상관계수 (0 < ρ < 1)

    Returns
    -------
    float : PIT (Point-in-Time) 부도확률

    Examples
    --------
    >>> conditional_pd(0.02, 0, 0.20)   # Z=0이면 PD_PIT = PD_TTC
    0.02
    >>> conditional_pd(0.02, -2, 0.20)  # 불황시 PD 상승
    0.1016...
    >>> conditional_pd(0.02, 2, 0.20)   # 호황시 PD 하락
    0.0024...
    """
    if pd_ttc <= 0:
        return 0.0
    if pd_ttc >= 1:
        return 1.0

    sqrt_rho = np.sqrt(rho)
    sqrt_1_rho = np.sqrt(1.0 - rho)

    numerator = norm.ppf(pd_ttc) - sqrt_rho * z
    pd_pit = norm.cdf(numerator / sqrt_1_rho)

    return float(np.clip(pd_pit, 0.0, 1.0))


def conditional_pd_array(pd_ttc_array: np.ndarray, z: float, rho: float) -> np.ndarray:
    """
    벡터화된 Vasicek 공식 (등급별 TTC PD 배열 → PIT PD 배열)
    """
    pd_ttc_clipped = np.clip(pd_ttc_array, 1e-10, 1.0 - 1e-10)

    sqrt_rho = np.sqrt(rho)
    sqrt_1_rho = np.sqrt(1.0 - rho)

    numerator = norm.ppf(pd_ttc_clipped) - sqrt_rho * z
    pd_pit = norm.cdf(numerator / sqrt_1_rho)

    return np.clip(pd_pit, 0.0, 1.0)


def conditional_transition_matrix(
    ttc_tm: np.ndarray,
    z: float,
    rho: float
) -> np.ndarray:
    """
    임계값 기반 Z-조건부 전이행렬 산출

    TTC 전이행렬로부터 누적확률 임계값을 산출하고,
    Z 값에 따라 조건부 전이확률을 계산합니다.

    Parameters
    ----------
    ttc_tm : np.ndarray  - N×N TTC 전이행렬
    z : float            - 체계적 요인
    rho : float          - 자산상관계수

    Returns
    -------
    np.ndarray : N×N 조건부 전이행렬
    """
    n = ttc_tm.shape[0]
    sqrt_rho = np.sqrt(rho)
    sqrt_1_rho = np.sqrt(1.0 - rho)

    # 임계값 산출 (누적확률 → Φ⁻¹)
    thresholds = np.full((n, n), np.inf)
    for i in range(n):
        cum_prob = 0.0
        for j in range(n - 1):
            cum_prob += ttc_tm[i, j]
            cum_prob_clipped = np.clip(cum_prob, 1e-10, 1.0 - 1e-10)
            thresholds[i, j] = norm.ppf(cum_prob_clipped)

    # 조건부 전이행렬 계산
    cond_tm = np.zeros((n, n))

    for i in range(n - 1):
        for j in range(n):
            d_upper = thresholds[i, j]
            upper = norm.cdf((d_upper - sqrt_rho * z) / sqrt_1_rho)

            if j == 0:
                lower = 0.0
            else:
                d_lower = thresholds[i, j - 1]
                lower = norm.cdf((d_lower - sqrt_rho * z) / sqrt_1_rho)

            cond_tm[i, j] = max(upper - lower, 0.0)

        # 행 합 정규화
        row_sum = cond_tm[i].sum()
        if row_sum > 0:
            cond_tm[i] /= row_sum

    # D행: 흡수상태
    cond_tm[-1, -1] = 1.0

    return cond_tm


def multi_period_pd(
    annual_tm: np.ndarray,
    horizon: int,
    initial_grade_idx: Optional[int] = None
) -> np.ndarray:
    """
    전이행렬 거듭제곱으로 다기간 누적/한계 PD 계산

    Parameters
    ----------
    annual_tm : np.ndarray   - 1년 전이행렬
    horizon : int             - 예측 기간 (년)
    initial_grade_idx : int   - 특정 등급만 계산 (None이면 전체)

    Returns
    -------
    np.ndarray
        shape (horizon, N-1): 연도별 각 등급의 누적 PD
        또는 shape (horizon,): 특정 등급의 누적 PD
    """
    n = annual_tm.shape[0]
    cumulative_tm = np.eye(n)

    cumulative_pds = []
    for t in range(1, horizon + 1):
        cumulative_tm = cumulative_tm @ annual_tm
        # 부도열(마지막 열)이 누적 PD
        if initial_grade_idx is not None:
            cumulative_pds.append(cumulative_tm[initial_grade_idx, -1])
        else:
            cumulative_pds.append(cumulative_tm[:-1, -1].copy())

    return np.array(cumulative_pds)


def marginal_pd_from_cumulative(cumulative_pds: np.ndarray) -> np.ndarray:
    """
    누적 PD에서 한계 PD(Marginal PD) 계산

    Marginal PD(t) = Cumulative PD(t) - Cumulative PD(t-1)
    """
    if cumulative_pds.ndim == 1:
        marginal = np.diff(cumulative_pds, prepend=0.0)
    else:
        first_row = np.zeros((1, cumulative_pds.shape[1]))
        marginal = np.diff(cumulative_pds, axis=0, prepend=first_row)

    return np.maximum(marginal, 0.0)


def survival_probability(cumulative_pds: np.ndarray) -> np.ndarray:
    """생존확률 = 1 - 누적 PD"""
    return 1.0 - cumulative_pds


def annualized_pd(cumulative_pd: float, horizon: int) -> float:
    """
    누적 PD를 연환산 PD로 변환

    AnnualizedPD = 1 - (1 - CumulativePD)^(1/horizon)
    """
    if cumulative_pd >= 1.0:
        return 1.0
    return 1.0 - (1.0 - cumulative_pd) ** (1.0 / horizon)


def worst_case_pd(pd_ttc: float, rho: float, confidence: float = 0.999) -> float:
    """
    Basel II IRB 방식 Worst-Case PD (99.9% 신뢰수준)

    WCPD = Φ( (Φ⁻¹(PD) + √ρ · Φ⁻¹(confidence)) / √(1-ρ) )
    """
    if pd_ttc <= 0:
        return 0.0

    sqrt_rho = np.sqrt(rho)
    sqrt_1_rho = np.sqrt(1.0 - rho)

    numerator = norm.ppf(pd_ttc) + sqrt_rho * norm.ppf(confidence)
    return float(norm.cdf(numerator / sqrt_1_rho))