feat: Lifetime PD (50yr) - Belkin & Suchower + Vasicek model

- Belkin & Suchower (1998) credit cycle index (Zt) estimation via WLS - Vasicek single-factor conditional PD/TM model - Macro-Zt OLS regression with stepwise variable selection - 3-scenario (boom/neutral/recession) 50yr PD projection - Statistical validation suite (ADF, Ljung-Box, R2, ARCH) - BOK ECOS API integration with fallback data - Visualization module (7 chart types) - Detailed theoretical methodology docs/methodology.md
2026-03-10 21:57:34 +09:00
commit 3a9374c61a
39 changed files with 4671 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,304 @@
+"""
+Lifetime PD (50년) 메인 실행 파일
+
+전체 파이프라인:
+1. 데이터 로딩 (전이행렬 + 거시경제변수)
+2. Belkin & Suchower Zt 추정
+3. 거시연계 회귀모형 구축
+4. 시나리오별 Zt 경로 생성
+5. 50년 Lifetime PD 산출
+6. 통계적 검증
+7. 시각화 및 리포트
+
+사용법:
+    python main.py
+    python main.py --horizon 30
+    python main.py --no-api           # ECOS API 호출 없이 fallback 데이터 사용
+    python main.py --estimate-rho     # 자산상관계수 동시 추정
+"""
+
+import sys
+import io
+
+# Windows CP949 인코딩 문제 해결
+if sys.stdout.encoding != 'utf-8':
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
+
+import argparse
+import logging
+import yaml
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from tabulate import tabulate
+
+# 프로젝트 모듈
+from data.transition_matrices import (
+    load_transition_matrices, compute_ttc_matrix,
+    get_default_rates, display_matrix, RATING_GRADES
+)
+from data.macro_data import load_macro_data, _fallback_macro_data
+from models.credit_cycle import estimate_zt_series, estimate_rho_and_zt
+from models.vasicek import conditional_pd, worst_case_pd
+from models.macro_model import build_macro_zt_model
+from scenarios.scenario_engine import ScenarioEngine, load_config
+from projection.lifetime_pd import LifetimePDEngine, compute_ecl_weights
+from validation.statistical_tests import run_full_validation
+from output.visualizer import generate_all_plots
+
+# 로깅 설정
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%H:%M:%S"
+)
+logger = logging.getLogger(__name__)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Lifetime PD (50년) 산출 시스템")
+    parser.add_argument("--config", default="config.yaml", help="설정 파일 경로")
+    parser.add_argument("--horizon", type=int, default=None, help="예측 기간 (기본: config 값)")
+    parser.add_argument("--no-api", action="store_true", help="ECOS API 미사용 (fallback 데이터)")
+    parser.add_argument("--estimate-rho", action="store_true", help="자산상관계수 동시 추정")
+    parser.add_argument("--output", default=None, help="결과 저장 디렉토리")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    
+    # ================================================================
+    # 0. 설정 로딩
+    # ================================================================
+    print("=" * 70)
+    print("  Lifetime PD (50년) - 미래 경기 반영 부도율 산출 시스템")
+    print("=" * 70)
+    
+    config = load_config(args.config)
+    model_config = config.get("model", {})
+    rho = model_config.get("rho", 0.20)
+    
+    conv_config = config.get("convergence", {})
+    horizon = args.horizon or conv_config.get("total_horizon", 50)
+    
+    output_dir = args.output or config.get("output", {}).get("save_dir", "results")
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    
+    print(f"\n  ρ (자산상관계수) = {rho}")
+    print(f"  예측 기간 = {horizon}년")
+    print(f"  결과 저장 = {output_dir}/")
+    
+    # ================================================================
+    # 1. 데이터 로딩
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [1/7] 데이터 로딩")
+    print("=" * 70)
+    
+    # 전이행렬
+    logger.info("전이행렬 로딩 중 (내장 데이터)...")
+    transition_matrices = load_transition_matrices("builtin")
+    ttc_matrix = compute_ttc_matrix(transition_matrices)
+    default_rates = get_default_rates(transition_matrices)
+    
+    print(f"\n  전이행렬: {len(transition_matrices)}개 연도 ({min(transition_matrices.keys())}~{max(transition_matrices.keys())})")
+    print(display_matrix(ttc_matrix, "TTC 전이행렬 (장기 평균)"))
+    
+    # 거시경제변수
+    if args.no_api:
+        logger.info("Fallback 거시경제 데이터 사용")
+        macro_data = _fallback_macro_data()
+    else:
+        macro_data = load_macro_data(args.config)
+    
+    print(f"\n  거시변수: {len(macro_data)}개 연도, {len(macro_data.columns)}개 변수")
+    print(f"  변수: {', '.join(macro_data.columns)}")
+    print(macro_data.tail(5).to_string())
+    
+    # ================================================================
+    # 2. Belkin & Suchower Zt 추정
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [2/7] 신용사이클 인덱스 (Zt) 추정")
+    print("=" * 70)
+    
+    if args.estimate_rho:
+        logger.info("ρ와 Zt 동시 추정 중...")
+        rho, zt_dict = estimate_rho_and_zt(transition_matrices, ttc_matrix)
+        print(f"\n  추정된 ρ = {rho:.4f}")
+    else:
+        zt_dict = estimate_zt_series(transition_matrices, ttc_matrix, rho)
+    
+    zt_series = pd.Series(zt_dict, name="Zt")
+    zt_series.index.name = "YEAR"
+    
+    print(f"\n  Zt 통계: μ={zt_series.mean():.4f}, σ={zt_series.std():.4f}")
+    print(f"  최소: {zt_series.min():.4f} ({zt_series.idxmin()})")
+    print(f"  최대: {zt_series.max():.4f} ({zt_series.idxmax()})")
+    
+    zt_df = pd.DataFrame({"Year": zt_series.index, "Zt": zt_series.values})
+    print("\n" + tabulate(zt_df, headers="keys", tablefmt="simple", floatfmt=".4f"))
+    
+    # ================================================================
+    # 3. 거시연계 회귀모형
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [3/7] 거시연계 회귀모형 (Zt ~ 거시변수)")
+    print("=" * 70)
+    
+    macro_model = build_macro_zt_model(zt_dict, macro_data, method="stepwise_aic")
+    
+    print(f"\n  선택된 변수: {macro_model.selected_vars}")
+    print(macro_model.summary())
+    
+    diag = macro_model.diagnostics()
+    print(f"\n  R² = {diag['r_squared']:.4f}")
+    print(f"  Adj. R² = {diag['adj_r_squared']:.4f}")
+    print(f"  AIC = {diag['aic']:.2f}")
+    print(f"  DW = {diag['durbin_watson']:.3f}")
+    
+    # ================================================================
+    # 4. 시나리오 생성
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [4/7] 시나리오 생성 (호황/중립/불황)")
+    print("=" * 70)
+    
+    scenario_engine = ScenarioEngine(config)
+    
+    # 거시 시나리오 생성
+    macro_scenarios = scenario_engine.generate_default_macro_scenarios(
+        macro_data, base_year=2025, forecast_years=5
+    )
+    
+    # Zt 경로 생성
+    z_paths = scenario_engine.generate_z_paths(
+        zt_dict, macro_model, macro_scenarios, base_year=2025
+    )
+    
+    weights = scenario_engine.get_scenario_weights()
+    print(f"\n  시나리오 가중치: {weights}")
+    
+    for name, path in z_paths.items():
+        display = scenario_engine.get_display_name(name)
+        print(f"\n  {display}:")
+        print(f"    Zt[1-5] = {path[:5].round(3)}")
+        print(f"    Zt[10]  = {path[9]:.3f}")
+        print(f"    Zt[50]  = {path[-1]:.3f}")
+    
+    # ================================================================
+    # 5. 50년 Lifetime PD 산출
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [5/7] 50년 Lifetime PD 산출")
+    print("=" * 70)
+    
+    pd_engine = LifetimePDEngine(ttc_matrix, rho)
+    pd_results = pd_engine.compute_all_scenarios(z_paths, weights, horizon)
+    
+    # 누적 PD 테이블
+    print("\n  === 가중평균 누적 PD (%) ===")
+    cum_table = pd_engine.format_pd_table(pd_results)
+    print(tabulate(cum_table * 100, headers="keys", tablefmt="simple", floatfmt=".3f"))
+    
+    # 시나리오별 주요 등급 비교
+    for scenario in z_paths.keys():
+        display = scenario_engine.get_display_name(scenario)
+        print(f"\n  === {display} 누적 PD (%) ===")
+        s_table = pd_engine.format_pd_table(pd_results, scenario=scenario)
+        print(tabulate(s_table * 100, headers="keys", tablefmt="simple", floatfmt=".3f"))
+    
+    # Vasicek Worst-Case 비교
+    print("\n  === Basel II Worst-Case PD (99.9% VaR) ===")
+    ttc_pds = ttc_matrix[:-1, -1]
+    for i, grade in enumerate(RATING_GRADES[:-1]):
+        wc = worst_case_pd(ttc_pds[i], rho)
+        print(f"    {grade}: TTC={ttc_pds[i]*100:.3f}% → WC={wc*100:.3f}%")
+    
+    # ================================================================
+    # 6. 통계적 검증
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [6/7] 통계적 검증")
+    print("=" * 70)
+    
+    reg_result = macro_model.result if macro_model else None
+    validation_df = run_full_validation(
+        zt_series.values,
+        reg_result,
+        pd_results,
+        list(RATING_GRADES[:-1])
+    )
+    
+    print("\n" + tabulate(validation_df, headers="keys", tablefmt="grid"))
+    
+    # ================================================================
+    # 7. 시각화
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  [7/7] 시각화 및 리포트 생성")
+    print("=" * 70)
+    
+    generate_all_plots(
+        zt_history=zt_dict,
+        z_paths=z_paths,
+        zt_series_pd=zt_series,
+        macro_data=macro_data,
+        pd_results=pd_results,
+        ttc_matrix=ttc_matrix,
+        validation_df=validation_df,
+        output_dir=output_dir,
+        base_year=2025
+    )
+    
+    # ================================================================
+    # 결과 저장 (CSV)
+    # ================================================================
+    out_path = Path(output_dir)
+    
+    # Zt 시계열 저장
+    zt_series.to_csv(out_path / "zt_series.csv")
+    
+    # 거시경제 데이터 저장
+    macro_data.to_csv(out_path / "macro_data.csv")
+    
+    # PD 테이블 저장
+    for scenario in list(z_paths.keys()) + [None]:
+        label = scenario if scenario else "weighted"
+        cum_df = pd_engine.format_pd_table(
+            pd_results,
+            years=list(range(1, horizon + 1)),
+            scenario=scenario
+        )
+        cum_df.to_csv(out_path / f"cumulative_pd_{label}.csv")
+        
+        marg_df = pd_engine.format_marginal_pd_table(
+            pd_results,
+            years=list(range(1, horizon + 1)),
+            scenario=scenario
+        )
+        marg_df.to_csv(out_path / f"marginal_pd_{label}.csv")
+    
+    # 검증 결과 저장
+    validation_df.to_csv(out_path / "validation_results.csv", index=False)
+    
+    # ================================================================
+    # 완료
+    # ================================================================
+    print("\n" + "=" * 70)
+    print("  ✅ 완료!")
+    print("=" * 70)
+    print(f"\n  결과 파일:{output_dir}/")
+    print(f"    - 차트: 01~07_*.png")
+    print(f"    - 데이터: zt_series.csv, macro_data.csv")
+    print(f"    - PD: cumulative_pd_*.csv, marginal_pd_*.csv")
+    print(f"    - 검증: validation_results.csv")
+    print()
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())