feat: Lifetime PD (50yr) - Belkin & Suchower + Vasicek model

- Belkin & Suchower (1998) credit cycle index (Zt) estimation via WLS
- Vasicek single-factor conditional PD/TM model
- Macro-Zt OLS regression with stepwise variable selection
- 3-scenario (boom/neutral/recession) 50yr PD projection
- Statistical validation suite (ADF, Ljung-Box, R2, ARCH)
- BOK ECOS API integration with fallback data
- Visualization module (7 chart types)
- Detailed theoretical methodology docs/methodology.md
This commit is contained in:
Variet Agent
2026-03-10 21:57:34 +09:00
commit 3a9374c61a
39 changed files with 4671 additions and 0 deletions

307
models/macro_model.py Normal file
View File

@@ -0,0 +1,307 @@
"""
거시경제 변수 ↔ Zt 연계 통계모형
Zt(신용사이클 인덱스)를 거시경제변수로 설명하는 회귀모형을 구축하고,
미래 거시 시나리오에 따른 Zt 전망을 생성합니다.
모형:
Z_t = β₀ + β₁·GDP_growth + β₂·Unemployment + β₃·Base_Rate
+ β₄·CD_Rate + β₅·CPI_growth + β₆·Leading_Index + ε_t
방법론 참고:
- IMF (2021). "IFRS 9 and CECL Compatible Estimation for Top-Down Solvency Stress Testing"
- ECB (2019). "Scenario Design for IFRS 9 Expected Credit Loss Estimation"
- Fed (2022). "Dodd-Frank Act Stress Test Methodology"
"""
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_ljungbox
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy import stats
from typing import Dict, List, Optional, Tuple
import logging
import warnings
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore", category=FutureWarning)
class MacroZtModel:
"""
거시경제변수 → Zt 회귀모형
Features:
- OLS 다중회귀
- 변수 선택 (Stepwise AIC/BIC)
- 잔차 진단 (ADF, Ljung-Box, Breusch-Pagan, DW)
- VIF 다중공선성 체크
- 시나리오별 Zt 예측
"""
def __init__(self):
self.model = None
self.result = None
self.selected_vars = None
self.scaler_params = {} # 정규화 파라미터
def fit(
self,
zt_series: pd.Series,
macro_data: pd.DataFrame,
method: str = "stepwise_aic",
standardize: bool = True
) -> "MacroZtModel":
"""
Zt ~ 거시변수 회귀모형 적합
Parameters
----------
zt_series : pd.Series
index=연도, values=Zt 추정값
macro_data : pd.DataFrame
index=연도, columns=거시변수
method : str
변수 선택 방법:
- "all": 모든 변수 사용
- "stepwise_aic": Forward stepwise (AIC 기준)
- "stepwise_bic": Forward stepwise (BIC 기준)
standardize : bool
거시변수 표준화 여부
Returns
-------
self
"""
# 인덱스 정렬 및 교집합
common_years = sorted(set(zt_series.index) & set(macro_data.index))
if len(common_years) < 5:
raise ValueError(f"공통 데이터 포인트가 부족합니다: {len(common_years)}")
y = zt_series.loc[common_years].values.astype(float)
X = macro_data.loc[common_years].copy()
# 결측치 처리
X = X.ffill().bfill().dropna(axis=1)
# 표준화
if standardize:
for col in X.columns:
mean = X[col].mean()
std = X[col].std()
if std > 0:
self.scaler_params[col] = {"mean": mean, "std": std}
X[col] = (X[col] - mean) / std
else:
X = X.drop(columns=[col])
# 변수 선택
if method == "all":
self.selected_vars = list(X.columns)
elif method.startswith("stepwise"):
criterion = "aic" if "aic" in method else "bic"
self.selected_vars = self._stepwise_selection(y, X, criterion)
else:
self.selected_vars = list(X.columns)
if not self.selected_vars:
logger.warning("변수 선택 결과 선택된 변수가 없습니다. 전체 변수 사용.")
self.selected_vars = list(X.columns)
# 최종 모형 적합
X_selected = sm.add_constant(X[self.selected_vars].values)
self.model = sm.OLS(y, X_selected)
self.result = self.model.fit()
logger.info(f"회귀모형 적합 완료: 선택변수 = {self.selected_vars}")
logger.info(f" R² = {self.result.rsquared:.4f}, "
f"Adj.R² = {self.result.rsquared_adj:.4f}, "
f"AIC = {self.result.aic:.2f}")
return self
def _stepwise_selection(
self,
y: np.ndarray,
X: pd.DataFrame,
criterion: str = "aic"
) -> List[str]:
"""Forward Stepwise 변수 선택"""
remaining = list(X.columns)
selected = []
current_score = np.inf
while remaining:
scores = {}
for var in remaining:
trial_vars = selected + [var]
X_trial = sm.add_constant(X[trial_vars].values)
try:
model = sm.OLS(y, X_trial).fit()
score = model.aic if criterion == "aic" else model.bic
scores[var] = score
except Exception:
continue
if not scores:
break
best_var = min(scores, key=scores.get)
best_score = scores[best_var]
if best_score < current_score:
selected.append(best_var)
remaining.remove(best_var)
current_score = best_score
logger.debug(f" + {best_var} ({criterion.upper()} = {best_score:.2f})")
else:
break
return selected
def predict(self, macro_scenario: pd.DataFrame) -> np.ndarray:
"""
거시 시나리오로 Zt 예측
Parameters
----------
macro_scenario : pd.DataFrame
columns에 selected_vars가 포함되어야 함
Returns
-------
np.ndarray : Zt 예측값 배열
"""
if self.result is None:
raise ValueError("모형이 적합되지 않았습니다. fit()을 먼저 실행하세요.")
X = macro_scenario[self.selected_vars].copy()
# 학습 데이터와 동일한 표준화 적용
for col in X.columns:
if col in self.scaler_params:
mean = self.scaler_params[col]["mean"]
std = self.scaler_params[col]["std"]
X[col] = (X[col] - mean) / std
X_const = sm.add_constant(X.values, has_constant="add")
return self.result.predict(X_const)
def diagnostics(self) -> Dict[str, any]:
"""
회귀 모형 진단 결과 반환
Returns
-------
dict with keys:
- r_squared, adj_r_squared
- f_stat, f_pvalue
- aic, bic
- durbin_watson
- ljung_box (p-value)
- breusch_pagan (p-value)
- vif (각 변수별)
- coefficients (DataFrame)
"""
if self.result is None:
return {}
diag = {
"r_squared": self.result.rsquared,
"adj_r_squared": self.result.rsquared_adj,
"f_stat": self.result.fvalue,
"f_pvalue": self.result.f_pvalue,
"aic": self.result.aic,
"bic": self.result.bic,
"n_obs": int(self.result.nobs),
"selected_vars": self.selected_vars,
}
# Durbin-Watson
residuals = self.result.resid
diag["durbin_watson"] = durbin_watson(residuals)
# Ljung-Box (자기상관 검정)
try:
lb_result = acorr_ljungbox(residuals, lags=[5], return_df=True)
diag["ljung_box_stat"] = lb_result["lb_stat"].values[0]
diag["ljung_box_pvalue"] = lb_result["lb_pvalue"].values[0]
except Exception:
diag["ljung_box_pvalue"] = np.nan
# Breusch-Pagan (이분산 검정)
try:
bp_stat, bp_pvalue, _, _ = het_breuschpagan(
residuals, self.result.model.exog
)
diag["breusch_pagan_stat"] = bp_stat
diag["breusch_pagan_pvalue"] = bp_pvalue
except Exception:
diag["breusch_pagan_pvalue"] = np.nan
# VIF (다중공선성)
try:
X = self.result.model.exog
vif_values = {}
var_names = ["const"] + self.selected_vars
for i in range(X.shape[1]):
vif_values[var_names[i]] = variance_inflation_factor(X, i)
diag["vif"] = vif_values
except Exception:
diag["vif"] = {}
# 계수 요약
coef_df = pd.DataFrame({
"변수": ["const"] + self.selected_vars,
"계수": self.result.params,
"표준오차": self.result.bse,
"t값": self.result.tvalues,
"p값": self.result.pvalues,
})
diag["coefficients"] = coef_df
return diag
def summary(self) -> str:
"""모형 요약 출력"""
if self.result is None:
return "모형이 적합되지 않았습니다."
return str(self.result.summary())
def residual_series(self) -> np.ndarray:
"""잔차 시계열 반환"""
if self.result is None:
return np.array([])
return self.result.resid
def build_macro_zt_model(
zt_dict: Dict[int, float],
macro_df: pd.DataFrame,
method: str = "stepwise_aic"
) -> MacroZtModel:
"""
편의 함수: Zt 딕셔너리 + 거시 DataFrame → 회귀모형 구축
Parameters
----------
zt_dict : Dict[int, float]
{연도: Zt값}
macro_df : pd.DataFrame
index=연도, columns=거시변수
method : str
변수 선택 방법
Returns
-------
MacroZtModel : 적합된 모형
"""
zt_series = pd.Series(zt_dict, name="Zt")
zt_series.index.name = "YEAR"
model = MacroZtModel()
model.fit(zt_series, macro_df, method=method)
return model