Files
LifetimePD/main.py
Variet Agent d1ddf06e5d feat(model): KAP YTM PD floor integration, expanded 226-var search, ADF fix (AIC->BIC), Model#2 with 6-test diagnostics
- Replace hardcoded DEFAULT_PD_FLOORS with build_complete_pd_floor_table() (KAP bond YTM)
- Fix ADF test: autolag='AIC' -> 'BIC' for small sample (N=26) robustness
- Expand variable search: 40 -> 226 vars (log/diff/return/lag2), 1.9M combos
- Select Model #2: HOUSING_PRICE + CREDIT_SPREAD_LAG1 + CURRENT_ACCOUNT_R
- Add 6-test diagnostics table to AR1 sheet (ADF/LB/DW/BP/ARCH/Shapiro)
- Add Korean variable names for transformed variables
- Generate report v7 with full diagnostics
2026-03-12 00:06:23 +09:00

374 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Lifetime PD (50년) 메인 실행 파일
전체 파이프라인:
1. 데이터 로딩 (전이행렬 + 거시경제변수)
2. Belkin & Suchower Zt 추정
3. 거시연계 회귀모형 구축
4. 시나리오별 Zt 경로 생성
5. 50년 Lifetime PD 산출
6. 통계적 검증
7. 시각화 및 리포트
사용법:
python main.py
python main.py --horizon 30
python main.py --no-api # ECOS API 호출 없이 fallback 데이터 사용
python main.py --estimate-rho # 자산상관계수 동시 추정
"""
import sys
import io
# Windows CP949 인코딩 문제 해결
if sys.stdout.encoding != 'utf-8':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
import argparse
import logging
import yaml
import numpy as np
import pandas as pd
from pathlib import Path
from tabulate import tabulate
# 프로젝트 모듈
from data.transition_matrices import (
load_transition_matrices, compute_ttc_matrix,
get_default_rates, display_matrix, RATING_GRADES, RATING_GRADES_8
)
from data.ccc_interpolator import expand_to_8x8
from data.macro_data import load_macro_data, _fallback_macro_data, compute_derived_features
from models.credit_cycle import estimate_zt_series, estimate_rho_and_zt
from models.vasicek import conditional_pd, worst_case_pd
from models.macro_model import build_macro_zt_model
from scenarios.scenario_engine import ScenarioEngine, load_config
from projection.lifetime_pd import LifetimePDEngine, compute_ecl_weights
from validation.statistical_tests import run_full_validation
from output.visualizer import generate_all_plots
# 로깅 설정
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%H:%M:%S"
)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser(description="Lifetime PD (50년) 산출 시스템")
parser.add_argument("--config", default="config.yaml", help="설정 파일 경로")
parser.add_argument("--horizon", type=int, default=None, help="예측 기간 (기본: config 값)")
parser.add_argument("--no-api", action="store_true", help="ECOS API 미사용 (fallback 데이터)")
parser.add_argument("--estimate-rho", action="store_true", help="자산상관계수 동시 추정")
parser.add_argument("--output", default=None, help="결과 저장 디렉토리")
return parser.parse_args()
def main():
args = parse_args()
# ================================================================
# 0. 설정 로딩
# ================================================================
print("=" * 70)
print(" Lifetime PD (50년) - 미래 경기 반영 부도율 산출 시스템")
print("=" * 70)
config = load_config(args.config)
model_config = config.get("model", {})
rho = model_config.get("rho", 0.20)
conv_config = config.get("convergence", {})
horizon = args.horizon or conv_config.get("total_horizon", 50)
output_dir = args.output or config.get("output", {}).get("save_dir", "results")
Path(output_dir).mkdir(parents=True, exist_ok=True)
print(f"\n ρ (자산상관계수) = {rho}")
print(f" 예측 기간 = {horizon}")
print(f" 결과 저장 = {output_dir}/")
# ================================================================
# 1. 데이터 로딩
# ================================================================
print("\n" + "=" * 70)
print(" [1/7] 데이터 로딩")
print("=" * 70)
# 전이행렬
data_config = config.get("data", {})
tm_source = data_config.get("transition_source", "builtin")
tm_dir = data_config.get("transition_dir", None)
logger.info(f"전이행렬 로딩 중 (source={tm_source})...")
transition_matrices_all = load_transition_matrices(tm_source, data_dir=tm_dir)
# 2000-2025 필터
transition_matrices_raw = {y:m for y,m in transition_matrices_all.items() if 2000 <= y <= 2025}
# PD 플로어 적용: KAP 채권 YTM 기반 시장내재 PD
from data.pd_floor import apply_pd_floor_to_matrices, build_complete_pd_floor_table
pd_floors_broad, _, pd_floors_full = build_complete_pd_floor_table()
transition_matrices = apply_pd_floor_to_matrices(transition_matrices_raw, pd_floors_broad)
ttc_matrix = compute_ttc_matrix(transition_matrices)
default_rates = get_default_rates(transition_matrices)
print(f"\n 전이행렬: {len(transition_matrices)}개 연도 ({min(transition_matrices.keys())}~{max(transition_matrices.keys())})"
f" [source={tm_source}]")
print(f" PD 플로어 (KAP 채권 YTM 기반): AAA={pd_floors_broad['AAA']*10000:.0f}bp, AA={pd_floors_broad['AA']*10000:.0f}bp, "
f"A={pd_floors_broad['A']*10000:.0f}bp, BBB={pd_floors_broad['BBB']*10000:.0f}bp")
print(display_matrix(ttc_matrix, "TTC 전이행렬 (KAP PD Floor 적용 후 장기 평균)"))
# 거시경제변수
if args.no_api:
logger.info("Fallback 거시경제 데이터 사용")
macro_data = _fallback_macro_data()
# ECOS fallback 데이터도 병합 (37개 변수)
try:
from data.ecos_fetcher import load_macro_data as load_ecos_macro
ecos_data = load_ecos_macro()
if ecos_data is not None and not ecos_data.empty:
macro_data = pd.concat([macro_data, ecos_data], axis=1)
macro_data = macro_data.loc[:, ~macro_data.columns.duplicated()]
logger.info(f"ECOS fallback 병합 완료: {len(macro_data.columns)}개 변수")
except Exception as e:
logger.warning(f"ECOS fallback 병합 실패: {e}")
else:
macro_data = load_macro_data(args.config)
print(f"\n 거시변수: {len(macro_data)}개 연도, {len(macro_data.columns)}개 변수")
print(f" 변수: {', '.join(macro_data.columns)}")
print(macro_data.tail(5).to_string())
# 파생변수 계산 (회사채 로그수익률, 기간/신용스프레드)
derived_features = compute_derived_features(macro_data)
if not derived_features.empty:
print(f"\n 파생변수: {', '.join(derived_features.columns)}")
print(derived_features.tail(5).to_string())
# ================================================================
# 2. Belkin & Suchower Zt 추정
# ================================================================
print("\n" + "=" * 70)
print(" [2/7] 신용사이클 인덱스 (Zt) 추정")
print("=" * 70)
if args.estimate_rho:
logger.info("ρ와 Zt 동시 추정 중...")
rho, zt_dict = estimate_rho_and_zt(transition_matrices, ttc_matrix)
print(f"\n 추정된 ρ = {rho:.4f}")
else:
zt_dict = estimate_zt_series(transition_matrices, ttc_matrix, rho)
zt_series = pd.Series(zt_dict, name="Zt")
zt_series.index.name = "YEAR"
print(f"\n Zt 통계: μ={zt_series.mean():.4f}, σ={zt_series.std():.4f}")
print(f" 최소: {zt_series.min():.4f} ({zt_series.idxmin()})")
print(f" 최대: {zt_series.max():.4f} ({zt_series.idxmax()})")
zt_df = pd.DataFrame({"Year": zt_series.index, "Zt": zt_series.values})
print("\n" + tabulate(zt_df, headers="keys", tablefmt="simple", floatfmt=".4f"))
# ================================================================
# 3. 거시연계 회귀모형
# ================================================================
print("\n" + "=" * 70)
print(" [3/7] 거시연계 회귀모형 (Zt ~ 거시변수)")
print("=" * 70)
# 파생변수가 있으면 원본 + 파생 결합
if not derived_features.empty:
model_input = pd.concat([macro_data, derived_features], axis=1)
model_input = model_input.loc[:, ~model_input.columns.duplicated()]
else:
model_input = macro_data
forced_vars = config.get("model", {}).get("macro_vars", None)
macro_method = config.get("model", {}).get("macro_method", "ar1_macro")
macro_model = build_macro_zt_model(zt_dict, model_input, method=macro_method,
forced_vars=forced_vars)
print(f"\n 선택된 변수: {macro_model.selected_vars}")
if macro_model.is_ar1:
import math
phi = macro_model.ar1_phi
half_life = math.log(2) / abs(math.log(abs(phi))) if 0 < abs(phi) < 1 else float('inf')
print(f" [AR(1)+Macro] φ = {phi:.4f} (반감기 = {half_life:.1f}년)")
print(f" c = {macro_model.ar1_const:.4f}")
for var, beta in macro_model.ar1_beta.items():
print(f" β({var}) = {beta:+.6f}")
print(macro_model.summary())
diag = macro_model.diagnostics()
print(f"\n R² = {diag['r_squared']:.4f}")
print(f" Adj. R² = {diag['adj_r_squared']:.4f}")
print(f" AIC = {diag['aic']:.2f}")
print(f" DW = {diag['durbin_watson']:.3f}")
# ================================================================
# 4. 시나리오 생성
# ================================================================
print("\n" + "=" * 70)
print(" [4/7] 시나리오 생성 (호황/중립/불황)")
print("=" * 70)
scenario_engine = ScenarioEngine(config)
# 거시 시나리오 생성
macro_scenarios = scenario_engine.generate_default_macro_scenarios(
macro_data, base_year=2025, forecast_years=5
)
# 시나리오에 파생변수 추가 (history + forecast로 lag/diff 계산)
if not derived_features.empty:
for sname, sdf in macro_scenarios.items():
# history + forecast 결합하여 파생변수 계산
combined = pd.concat([macro_data, sdf])
combined = combined[~combined.index.duplicated(keep='last')]
combined = combined.sort_index()
feat = compute_derived_features(combined)
# forecast 연도만 추출 후 시나리오에 결합
forecast_years = sdf.index
feat_forecast = feat.loc[feat.index.intersection(forecast_years)]
if not feat_forecast.empty:
macro_scenarios[sname] = pd.concat([sdf, feat_forecast], axis=1)
macro_scenarios[sname] = macro_scenarios[sname].loc[:, ~macro_scenarios[sname].columns.duplicated()]
# Zt 경로 생성
z_paths = scenario_engine.generate_z_paths(
zt_dict, macro_model, macro_scenarios, base_year=2025
)
weights = scenario_engine.get_scenario_weights()
print(f"\n 시나리오 가중치: {weights}")
for name, path in z_paths.items():
display = scenario_engine.get_display_name(name)
print(f"\n {display}:")
print(f" Zt[1-5] = {path[:5].round(3)}")
print(f" Zt[10] = {path[9]:.3f}")
print(f" Zt[50] = {path[-1]:.3f}")
# ================================================================
# 5. 50년 Lifetime PD 산출
# ================================================================
print("\n" + "=" * 70)
print(" [5/7] 50년 Lifetime PD 산출")
print("=" * 70)
# 7x7 TTC -> 8x8 TTC (CCC interpolation)
ttc_8x8 = expand_to_8x8(ttc_matrix)
print(f"\n 7x7 TTC -> 8x8 TTC (CCC interpolated)")
print(display_matrix(ttc_8x8, "TTC 8x8 (CCC interpolated)"))
pd_engine = LifetimePDEngine(ttc_8x8, rho, rating_grades=RATING_GRADES_8)
pd_results = pd_engine.compute_all_scenarios(z_paths, weights, horizon)
# 누적 PD 테이블
print("\n === 가중평균 누적 PD (%) ===")
cum_table = pd_engine.format_pd_table(pd_results)
print(tabulate(cum_table * 100, headers="keys", tablefmt="simple", floatfmt=".3f"))
# 시나리오별 주요 등급 비교
for scenario in z_paths.keys():
display = scenario_engine.get_display_name(scenario)
print(f"\n === {display} 누적 PD (%) ===")
s_table = pd_engine.format_pd_table(pd_results, scenario=scenario)
print(tabulate(s_table * 100, headers="keys", tablefmt="simple", floatfmt=".3f"))
# Vasicek Worst-Case 비교
print("\n === Basel II Worst-Case PD (99.9% VaR) ===")
ttc_pds = ttc_8x8[:-1, -1]
for i, grade in enumerate(RATING_GRADES_8[:-1]):
wc = worst_case_pd(ttc_pds[i], rho)
print(f" {grade}: TTC={ttc_pds[i]*100:.3f}% → WC={wc*100:.3f}%")
# ================================================================
# 6. 통계적 검증
# ================================================================
print("\n" + "=" * 70)
print(" [6/7] 통계적 검증")
print("=" * 70)
reg_result = macro_model.result if macro_model else None
validation_df = run_full_validation(
zt_series.values,
reg_result,
pd_results,
list(RATING_GRADES[:-1])
)
print("\n" + tabulate(validation_df, headers="keys", tablefmt="grid"))
# ================================================================
# 7. 시각화
# ================================================================
print("\n" + "=" * 70)
print(" [7/7] 시각화 및 리포트 생성")
print("=" * 70)
generate_all_plots(
zt_history=zt_dict,
z_paths=z_paths,
zt_series_pd=zt_series,
macro_data=macro_data,
pd_results=pd_results,
ttc_matrix=ttc_matrix,
validation_df=validation_df,
output_dir=output_dir,
base_year=2025
)
# ================================================================
# 결과 저장 (CSV)
# ================================================================
out_path = Path(output_dir)
# Zt 시계열 저장
zt_series.to_csv(out_path / "zt_series.csv")
# 거시경제 데이터 저장
macro_data.to_csv(out_path / "macro_data.csv")
# PD 테이블 저장
for scenario in list(z_paths.keys()) + [None]:
label = scenario if scenario else "weighted"
cum_df = pd_engine.format_pd_table(
pd_results,
years=list(range(1, horizon + 1)),
scenario=scenario
)
cum_df.to_csv(out_path / f"cumulative_pd_{label}.csv")
marg_df = pd_engine.format_marginal_pd_table(
pd_results,
years=list(range(1, horizon + 1)),
scenario=scenario
)
marg_df.to_csv(out_path / f"marginal_pd_{label}.csv")
# 검증 결과 저장
validation_df.to_csv(out_path / "validation_results.csv", index=False)
# ================================================================
# 완료
# ================================================================
print("\n" + "=" * 70)
print(" ✅ 완료!")
print("=" * 70)
print(f"\n 결과 파일:{output_dir}/")
print(f" - 차트: 01~07_*.png")
print(f" - 데이터: zt_series.csv, macro_data.csv")
print(f" - PD: cumulative_pd_*.csv, marginal_pd_*.csv")
print(f" - 검증: validation_results.csv")
print()
return 0
if __name__ == "__main__":
sys.exit(main())