Files
LifetimePD/main.py
Variet Agent 8af743e6f3 feat(data): parse 3-agency PDF transition matrices to CSV #task-290
- New: data/parse_pdf_matrices.py (KR/NICE/SCI PDF parser)
  - KR: text-based parser (space-separated numbers + dashes)
  - NICE: text-based parser (clean numeric format)
  - SCI: pdfplumber table extraction (column-position-aware)
  - WR redistribution, B이하→B mapping, CCC extrapolation from PD patterns
- Modified: data/transition_matrices.py (added source='real' loader)
- Modified: config.yaml (data.transition_source: 'real')
- Modified: main.py (reads transition source from config)
- Output: 112 CSV files (KR/NICE/SCI/AVG × 28 years)
2026-03-11 01:07:27 +09:00

309 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Lifetime PD (50년) 메인 실행 파일
전체 파이프라인:
1. 데이터 로딩 (전이행렬 + 거시경제변수)
2. Belkin & Suchower Zt 추정
3. 거시연계 회귀모형 구축
4. 시나리오별 Zt 경로 생성
5. 50년 Lifetime PD 산출
6. 통계적 검증
7. 시각화 및 리포트
사용법:
python main.py
python main.py --horizon 30
python main.py --no-api # ECOS API 호출 없이 fallback 데이터 사용
python main.py --estimate-rho # 자산상관계수 동시 추정
"""
import sys
import io
# Windows CP949 인코딩 문제 해결
if sys.stdout.encoding != 'utf-8':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
import argparse
import logging
import yaml
import numpy as np
import pandas as pd
from pathlib import Path
from tabulate import tabulate
# 프로젝트 모듈
from data.transition_matrices import (
load_transition_matrices, compute_ttc_matrix,
get_default_rates, display_matrix, RATING_GRADES
)
from data.macro_data import load_macro_data, _fallback_macro_data
from models.credit_cycle import estimate_zt_series, estimate_rho_and_zt
from models.vasicek import conditional_pd, worst_case_pd
from models.macro_model import build_macro_zt_model
from scenarios.scenario_engine import ScenarioEngine, load_config
from projection.lifetime_pd import LifetimePDEngine, compute_ecl_weights
from validation.statistical_tests import run_full_validation
from output.visualizer import generate_all_plots
# 로깅 설정
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%H:%M:%S"
)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser(description="Lifetime PD (50년) 산출 시스템")
parser.add_argument("--config", default="config.yaml", help="설정 파일 경로")
parser.add_argument("--horizon", type=int, default=None, help="예측 기간 (기본: config 값)")
parser.add_argument("--no-api", action="store_true", help="ECOS API 미사용 (fallback 데이터)")
parser.add_argument("--estimate-rho", action="store_true", help="자산상관계수 동시 추정")
parser.add_argument("--output", default=None, help="결과 저장 디렉토리")
return parser.parse_args()
def main():
args = parse_args()
# ================================================================
# 0. 설정 로딩
# ================================================================
print("=" * 70)
print(" Lifetime PD (50년) - 미래 경기 반영 부도율 산출 시스템")
print("=" * 70)
config = load_config(args.config)
model_config = config.get("model", {})
rho = model_config.get("rho", 0.20)
conv_config = config.get("convergence", {})
horizon = args.horizon or conv_config.get("total_horizon", 50)
output_dir = args.output or config.get("output", {}).get("save_dir", "results")
Path(output_dir).mkdir(parents=True, exist_ok=True)
print(f"\n ρ (자산상관계수) = {rho}")
print(f" 예측 기간 = {horizon}")
print(f" 결과 저장 = {output_dir}/")
# ================================================================
# 1. 데이터 로딩
# ================================================================
print("\n" + "=" * 70)
print(" [1/7] 데이터 로딩")
print("=" * 70)
# 전이행렬
data_config = config.get("data", {})
tm_source = data_config.get("transition_source", "builtin")
tm_dir = data_config.get("transition_dir", None)
logger.info(f"전이행렬 로딩 중 (source={tm_source})...")
transition_matrices = load_transition_matrices(tm_source, data_dir=tm_dir)
ttc_matrix = compute_ttc_matrix(transition_matrices)
default_rates = get_default_rates(transition_matrices)
print(f"\n 전이행렬: {len(transition_matrices)}개 연도 ({min(transition_matrices.keys())}~{max(transition_matrices.keys())})"
f" [source={tm_source}]")
print(display_matrix(ttc_matrix, "TTC 전이행렬 (장기 평균)"))
# 거시경제변수
if args.no_api:
logger.info("Fallback 거시경제 데이터 사용")
macro_data = _fallback_macro_data()
else:
macro_data = load_macro_data(args.config)
print(f"\n 거시변수: {len(macro_data)}개 연도, {len(macro_data.columns)}개 변수")
print(f" 변수: {', '.join(macro_data.columns)}")
print(macro_data.tail(5).to_string())
# ================================================================
# 2. Belkin & Suchower Zt 추정
# ================================================================
print("\n" + "=" * 70)
print(" [2/7] 신용사이클 인덱스 (Zt) 추정")
print("=" * 70)
if args.estimate_rho:
logger.info("ρ와 Zt 동시 추정 중...")
rho, zt_dict = estimate_rho_and_zt(transition_matrices, ttc_matrix)
print(f"\n 추정된 ρ = {rho:.4f}")
else:
zt_dict = estimate_zt_series(transition_matrices, ttc_matrix, rho)
zt_series = pd.Series(zt_dict, name="Zt")
zt_series.index.name = "YEAR"
print(f"\n Zt 통계: μ={zt_series.mean():.4f}, σ={zt_series.std():.4f}")
print(f" 최소: {zt_series.min():.4f} ({zt_series.idxmin()})")
print(f" 최대: {zt_series.max():.4f} ({zt_series.idxmax()})")
zt_df = pd.DataFrame({"Year": zt_series.index, "Zt": zt_series.values})
print("\n" + tabulate(zt_df, headers="keys", tablefmt="simple", floatfmt=".4f"))
# ================================================================
# 3. 거시연계 회귀모형
# ================================================================
print("\n" + "=" * 70)
print(" [3/7] 거시연계 회귀모형 (Zt ~ 거시변수)")
print("=" * 70)
macro_model = build_macro_zt_model(zt_dict, macro_data, method="stepwise_aic")
print(f"\n 선택된 변수: {macro_model.selected_vars}")
print(macro_model.summary())
diag = macro_model.diagnostics()
print(f"\n R² = {diag['r_squared']:.4f}")
print(f" Adj. R² = {diag['adj_r_squared']:.4f}")
print(f" AIC = {diag['aic']:.2f}")
print(f" DW = {diag['durbin_watson']:.3f}")
# ================================================================
# 4. 시나리오 생성
# ================================================================
print("\n" + "=" * 70)
print(" [4/7] 시나리오 생성 (호황/중립/불황)")
print("=" * 70)
scenario_engine = ScenarioEngine(config)
# 거시 시나리오 생성
macro_scenarios = scenario_engine.generate_default_macro_scenarios(
macro_data, base_year=2025, forecast_years=5
)
# Zt 경로 생성
z_paths = scenario_engine.generate_z_paths(
zt_dict, macro_model, macro_scenarios, base_year=2025
)
weights = scenario_engine.get_scenario_weights()
print(f"\n 시나리오 가중치: {weights}")
for name, path in z_paths.items():
display = scenario_engine.get_display_name(name)
print(f"\n {display}:")
print(f" Zt[1-5] = {path[:5].round(3)}")
print(f" Zt[10] = {path[9]:.3f}")
print(f" Zt[50] = {path[-1]:.3f}")
# ================================================================
# 5. 50년 Lifetime PD 산출
# ================================================================
print("\n" + "=" * 70)
print(" [5/7] 50년 Lifetime PD 산출")
print("=" * 70)
pd_engine = LifetimePDEngine(ttc_matrix, rho)
pd_results = pd_engine.compute_all_scenarios(z_paths, weights, horizon)
# 누적 PD 테이블
print("\n === 가중평균 누적 PD (%) ===")
cum_table = pd_engine.format_pd_table(pd_results)
print(tabulate(cum_table * 100, headers="keys", tablefmt="simple", floatfmt=".3f"))
# 시나리오별 주요 등급 비교
for scenario in z_paths.keys():
display = scenario_engine.get_display_name(scenario)
print(f"\n === {display} 누적 PD (%) ===")
s_table = pd_engine.format_pd_table(pd_results, scenario=scenario)
print(tabulate(s_table * 100, headers="keys", tablefmt="simple", floatfmt=".3f"))
# Vasicek Worst-Case 비교
print("\n === Basel II Worst-Case PD (99.9% VaR) ===")
ttc_pds = ttc_matrix[:-1, -1]
for i, grade in enumerate(RATING_GRADES[:-1]):
wc = worst_case_pd(ttc_pds[i], rho)
print(f" {grade}: TTC={ttc_pds[i]*100:.3f}% → WC={wc*100:.3f}%")
# ================================================================
# 6. 통계적 검증
# ================================================================
print("\n" + "=" * 70)
print(" [6/7] 통계적 검증")
print("=" * 70)
reg_result = macro_model.result if macro_model else None
validation_df = run_full_validation(
zt_series.values,
reg_result,
pd_results,
list(RATING_GRADES[:-1])
)
print("\n" + tabulate(validation_df, headers="keys", tablefmt="grid"))
# ================================================================
# 7. 시각화
# ================================================================
print("\n" + "=" * 70)
print(" [7/7] 시각화 및 리포트 생성")
print("=" * 70)
generate_all_plots(
zt_history=zt_dict,
z_paths=z_paths,
zt_series_pd=zt_series,
macro_data=macro_data,
pd_results=pd_results,
ttc_matrix=ttc_matrix,
validation_df=validation_df,
output_dir=output_dir,
base_year=2025
)
# ================================================================
# 결과 저장 (CSV)
# ================================================================
out_path = Path(output_dir)
# Zt 시계열 저장
zt_series.to_csv(out_path / "zt_series.csv")
# 거시경제 데이터 저장
macro_data.to_csv(out_path / "macro_data.csv")
# PD 테이블 저장
for scenario in list(z_paths.keys()) + [None]:
label = scenario if scenario else "weighted"
cum_df = pd_engine.format_pd_table(
pd_results,
years=list(range(1, horizon + 1)),
scenario=scenario
)
cum_df.to_csv(out_path / f"cumulative_pd_{label}.csv")
marg_df = pd_engine.format_marginal_pd_table(
pd_results,
years=list(range(1, horizon + 1)),
scenario=scenario
)
marg_df.to_csv(out_path / f"marginal_pd_{label}.csv")
# 검증 결과 저장
validation_df.to_csv(out_path / "validation_results.csv", index=False)
# ================================================================
# 완료
# ================================================================
print("\n" + "=" * 70)
print(" ✅ 완료!")
print("=" * 70)
print(f"\n 결과 파일:{output_dir}/")
print(f" - 차트: 01~07_*.png")
print(f" - 데이터: zt_series.csv, macro_data.csv")
print(f" - PD: cumulative_pd_*.csv, marginal_pd_*.csv")
print(f" - 검증: validation_results.csv")
print()
return 0
if __name__ == "__main__":
sys.exit(main())