diff --git a/data/macro_analysis.py b/data/macro_analysis.py new file mode 100644 index 0000000..b407e12 --- /dev/null +++ b/data/macro_analysis.py @@ -0,0 +1,503 @@ +""" +거시경제변수 포괄 탐색 및 Zt 회귀 최적화 + +ECOS API에서 30+ 후보변수 수집 → 6종 변환 → Zt 상관분석 → 최적 3변수 선택 + +사용법: + python data/macro_analysis.py # fallback 데이터로 빠른 분석 + python data/macro_analysis.py --fetch-ecos # ECOS API 실시간 수집 +""" + +import sys +import io +import re +import argparse +import itertools +import numpy as np +import pandas as pd +import warnings +from pathlib import Path +from typing import Dict, List, Tuple, Optional + +# Windows CP949 +if sys.stdout.encoding != 'utf-8': + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') + +warnings.filterwarnings("ignore") + +import statsmodels.api as sm +from scipy import stats + +BASE_DIR = Path(__file__).parent.parent + +# ============================================================ +# 1. ECOS API 변수 탐색 및 수집 +# ============================================================ + +# 후보 변수 정의: (name, stat_code, period, item_code1, transform_type) +# transform_type: 'level' (그대로), 'monthly_avg' (월→연평균), 'level_to_pct' (수준→전년변화율) +ECOS_CANDIDATES = [ + # 기존 6개 + ("GDP_GROWTH", "902Y015", "A", "KOR", "level"), + ("UNEMPLOYMENT", "901Y027", "A", "I61BC", "level"), + ("BASE_RATE", "722Y001", "A", "0101000", "level"), + ("CD_RATE", "721Y001", "A", "2010000", "level"), + ("CPI", "901Y009", "A", "0", "level_to_pct"), + ("LEADING_IDX", "901Y067", "M", "I16A", "monthly_avg"), + + # 금리/스프레드 + ("GOVT_3Y", "721Y001", "A", "5020000", "level"), # 국고채 3년 + ("GOVT_5Y", "721Y001", "A", "5030000", "level"), # 국고채 5년 + ("CORP_AA", "721Y001", "A", "7010000", "level"), # 회사채 AA- + ("CORP_BBB", "721Y001", "A", "7030000", "level"), # 회사채 BBB- + + # 수출입 + ("EXPORT", "403Y001", "A", "1", "level"), # 수출 (백만달러) + ("IMPORT", "403Y001", "A", "2", "level"), # 수입 + + # 금융 + ("EXCHANGE_RATE", "731Y003", "A", "0000001", "level"), # 원/달러 환율 + ("M2", "101Y003", "A", "BBIA00", "level"), # M2 통화량 + + # 산업생산 + ("IPI", "901Y033", "M", "I11A", "monthly_avg"), # 광공업생산지수 + + # 소비자심리 + ("CSI", "511Y002", "M", "FME", "monthly_avg"), # 소비자심리지수 +] + + +def fetch_all_ecos(api_key: str, start: int = 1997, end: int = 2025) -> pd.DataFrame: + """ECOS API에서 모든 후보변수 수집""" + import requests + import time + + base_url = "https://ecos.bok.or.kr/api" + results = {} + + for name, stat_code, period, item_code, ttype in ECOS_CANDIDATES: + print(f" Fetching {name} ({stat_code}/{item_code})...", end=' ') + + if period == "M": + s_date = f"{start}01" + e_date = f"{end}12" + else: + s_date = str(start) + e_date = str(end) + + url = (f"{base_url}/StatisticSearch/" + f"{api_key}/json/kr/1/500/" + f"{stat_code}/{period}/{s_date}/{e_date}/" + f"{item_code}/?/?") + + try: + resp = requests.get(url, timeout=30) + data = resp.json() + + if "StatisticSearch" not in data: + msg = data.get("RESULT", {}).get("MESSAGE", "no data") + print(f"SKIP ({msg[:30]})") + time.sleep(0.3) + continue + + rows = data["StatisticSearch"]["row"] + df = pd.DataFrame(rows) + df["DATA_VALUE"] = pd.to_numeric(df["DATA_VALUE"], errors="coerce") + + if ttype == "monthly_avg": + df["YEAR"] = df["TIME"].str[:4].astype(int) + series = df.groupby("YEAR")["DATA_VALUE"].mean() + elif ttype == "level_to_pct": + series = df.set_index("TIME")["DATA_VALUE"] + series.index = series.index.astype(int) + series = series.sort_index() + series = series.pct_change() * 100 + series = series.dropna() + else: # level + series = df.set_index("TIME")["DATA_VALUE"] + series.index = series.index.astype(int) + + series = series[~series.index.duplicated(keep='first')] + series = series.dropna() + series = series.loc[(series.index >= start) & (series.index <= end)] + + if len(series) >= 15: + results[name] = series + print(f"OK ({len(series)} obs)") + else: + print(f"SKIP ({len(series)} obs)") + + except Exception as e: + print(f"ERROR ({str(e)[:30]})") + + time.sleep(0.3) + + if results: + df = pd.DataFrame(results) + df.index.name = "YEAR" + df = df.sort_index() + return df + return pd.DataFrame() + + +def load_fallback_extended() -> pd.DataFrame: + """확장 fallback 데이터 (API 없이 빠른 분석)""" + data = { + 2000: {"GDP_GROWTH": 8.9, "UNEMPLOYMENT": 4.4, "BASE_RATE": 5.25, "CD_RATE": 7.09, "CPI": 2.3, "LEADING_IDX": 101.2, + "GOVT_3Y": 8.35, "CORP_AA": 9.35, "CORP_BBB": 11.90, "EXCHANGE_RATE": 1131, "EXPORT": 172268, "IMPORT": 160481, "M2": 651.8, "IPI": 102.5, "CSI": 101.0}, + 2001: {"GDP_GROWTH": 4.5, "UNEMPLOYMENT": 4.0, "BASE_RATE": 4.00, "CD_RATE": 5.34, "CPI": 4.1, "LEADING_IDX": 99.5, + "GOVT_3Y": 6.70, "CORP_AA": 8.12, "CORP_BBB": 11.27, "EXCHANGE_RATE": 1291, "EXPORT": 150439, "IMPORT": 141098, "M2": 736.5, "IPI": 99.5, "CSI": 96.5}, + 2002: {"GDP_GROWTH": 7.4, "UNEMPLOYMENT": 3.3, "BASE_RATE": 4.25, "CD_RATE": 4.99, "CPI": 2.8, "LEADING_IDX": 102.3, + "GOVT_3Y": 6.06, "CORP_AA": 7.02, "CORP_BBB": 9.75, "EXCHANGE_RATE": 1251, "EXPORT": 162471, "IMPORT": 152126, "M2": 816.3, "IPI": 108.5, "CSI": 105.0}, + 2003: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.6, "BASE_RATE": 3.75, "CD_RATE": 4.24, "CPI": 3.5, "LEADING_IDX": 98.8, + "GOVT_3Y": 4.93, "CORP_AA": 5.70, "CORP_BBB": 8.97, "EXCHANGE_RATE": 1192, "EXPORT": 193817, "IMPORT": 178827, "M2": 879.2, "IPI": 109.8, "CSI": 96.0}, + 2004: {"GDP_GROWTH": 4.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI": 3.6, "LEADING_IDX": 100.5, + "GOVT_3Y": 4.11, "CORP_AA": 4.72, "CORP_BBB": 7.53, "EXCHANGE_RATE": 1145, "EXPORT": 253845, "IMPORT": 224463, "M2": 935.3, "IPI": 119.2, "CSI": 97.0}, + 2005: {"GDP_GROWTH": 3.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.75, "CD_RATE": 3.81, "CPI": 2.8, "LEADING_IDX": 101.8, + "GOVT_3Y": 4.27, "CORP_AA": 4.68, "CORP_BBB": 6.51, "EXCHANGE_RATE": 1024, "EXPORT": 284419, "IMPORT": 261238, "M2": 1002.7, "IPI": 126.0, "CSI": 100.5}, + 2006: {"GDP_GROWTH": 5.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 4.50, "CD_RATE": 4.72, "CPI": 2.2, "LEADING_IDX": 102.5, + "GOVT_3Y": 4.83, "CORP_AA": 5.25, "CORP_BBB": 7.08, "EXCHANGE_RATE": 955, "EXPORT": 325465, "IMPORT": 309383, "M2": 1089.9, "IPI": 136.0, "CSI": 106.0}, + 2007: {"GDP_GROWTH": 5.5, "UNEMPLOYMENT": 3.2, "BASE_RATE": 5.00, "CD_RATE": 5.36, "CPI": 2.5, "LEADING_IDX": 103.1, + "GOVT_3Y": 5.23, "CORP_AA": 5.70, "CORP_BBB": 7.44, "EXCHANGE_RATE": 929, "EXPORT": 371489, "IMPORT": 356846, "M2": 1181.6, "IPI": 144.5, "CSI": 108.5}, + 2008: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.2, "BASE_RATE": 3.00, "CD_RATE": 5.70, "CPI": 4.7, "LEADING_IDX": 96.5, + "GOVT_3Y": 5.27, "CORP_AA": 7.02, "CORP_BBB": 10.73, "EXCHANGE_RATE": 1103, "EXPORT": 422007, "IMPORT": 435275, "M2": 1263.2, "IPI": 148.2, "CSI": 86.0}, + 2009: {"GDP_GROWTH": 0.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 2.00, "CD_RATE": 2.63, "CPI": 2.8, "LEADING_IDX": 98.2, + "GOVT_3Y": 4.04, "CORP_AA": 5.80, "CORP_BBB": 9.24, "EXCHANGE_RATE": 1276, "EXPORT": 363534, "IMPORT": 323085, "M2": 1404.4, "IPI": 140.0, "CSI": 85.0}, + 2010: {"GDP_GROWTH": 6.8, "UNEMPLOYMENT": 3.7, "BASE_RATE": 2.50, "CD_RATE": 2.80, "CPI": 2.9, "LEADING_IDX": 103.0, + "GOVT_3Y": 3.72, "CORP_AA": 4.66, "CORP_BBB": 7.98, "EXCHANGE_RATE": 1156, "EXPORT": 466384, "IMPORT": 425212, "M2": 1504.3, "IPI": 161.5, "CSI": 107.0}, + 2011: {"GDP_GROWTH": 3.7, "UNEMPLOYMENT": 3.4, "BASE_RATE": 3.25, "CD_RATE": 3.55, "CPI": 4.0, "LEADING_IDX": 101.2, + "GOVT_3Y": 3.62, "CORP_AA": 4.41, "CORP_BBB": 7.75, "EXCHANGE_RATE": 1108, "EXPORT": 555214, "IMPORT": 524413, "M2": 1586.5, "IPI": 168.0, "CSI": 100.0}, + 2012: {"GDP_GROWTH": 2.4, "UNEMPLOYMENT": 3.2, "BASE_RATE": 2.75, "CD_RATE": 3.13, "CPI": 2.2, "LEADING_IDX": 100.3, + "GOVT_3Y": 3.13, "CORP_AA": 3.76, "CORP_BBB": 6.56, "EXCHANGE_RATE": 1127, "EXPORT": 547870, "IMPORT": 519584, "M2": 1673.5, "IPI": 168.2, "CSI": 100.5}, + 2013: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.1, "BASE_RATE": 2.50, "CD_RATE": 2.72, "CPI": 1.3, "LEADING_IDX": 100.8, + "GOVT_3Y": 2.79, "CORP_AA": 3.19, "CORP_BBB": 5.87, "EXCHANGE_RATE": 1095, "EXPORT": 559632, "IMPORT": 515586, "M2": 1756.2, "IPI": 168.8, "CSI": 103.0}, + 2014: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 2.00, "CD_RATE": 2.36, "CPI": 1.3, "LEADING_IDX": 101.0, + "GOVT_3Y": 2.56, "CORP_AA": 2.99, "CORP_BBB": 5.22, "EXCHANGE_RATE": 1053, "EXPORT": 572665, "IMPORT": 525515, "M2": 1871.0, "IPI": 168.5, "CSI": 104.0}, + 2015: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 1.50, "CD_RATE": 1.72, "CPI": 0.7, "LEADING_IDX": 100.5, + "GOVT_3Y": 1.80, "CORP_AA": 2.18, "CORP_BBB": 4.61, "EXCHANGE_RATE": 1131, "EXPORT": 526757, "IMPORT": 436499, "M2": 2010.0, "IPI": 168.0, "CSI": 103.5}, + 2016: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.25, "CD_RATE": 1.48, "CPI": 1.0, "LEADING_IDX": 99.8, + "GOVT_3Y": 1.44, "CORP_AA": 1.88, "CORP_BBB": 4.60, "EXCHANGE_RATE": 1161, "EXPORT": 495426, "IMPORT": 406193, "M2": 2151.1, "IPI": 168.5, "CSI": 100.0}, + 2017: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.50, "CD_RATE": 1.52, "CPI": 1.9, "LEADING_IDX": 101.5, + "GOVT_3Y": 1.80, "CORP_AA": 2.28, "CORP_BBB": 4.83, "EXCHANGE_RATE": 1131, "EXPORT": 573694, "IMPORT": 478478, "M2": 2347.2, "IPI": 174.2, "CSI": 105.0}, + 2018: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.75, "CD_RATE": 1.85, "CPI": 1.5, "LEADING_IDX": 100.8, + "GOVT_3Y": 2.10, "CORP_AA": 2.67, "CORP_BBB": 5.41, "EXCHANGE_RATE": 1100, "EXPORT": 604860, "IMPORT": 535202, "M2": 2508.9, "IPI": 178.0, "CSI": 102.0}, + 2019: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.25, "CD_RATE": 1.63, "CPI": 0.4, "LEADING_IDX": 99.3, + "GOVT_3Y": 1.50, "CORP_AA": 1.93, "CORP_BBB": 4.52, "EXCHANGE_RATE": 1166, "EXPORT": 542233, "IMPORT": 503343, "M2": 2694.0, "IPI": 175.5, "CSI": 97.0}, + 2020: {"GDP_GROWTH": -0.7, "UNEMPLOYMENT": 4.0, "BASE_RATE": 0.50, "CD_RATE": 0.76, "CPI": 0.5, "LEADING_IDX": 97.0, + "GOVT_3Y": 0.98, "CORP_AA": 2.03, "CORP_BBB": 5.25, "EXCHANGE_RATE": 1180, "EXPORT": 512498, "IMPORT": 467633, "M2": 3070.2, "IPI": 170.0, "CSI": 90.0}, + 2021: {"GDP_GROWTH": 4.3, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.00, "CD_RATE": 1.09, "CPI": 2.5, "LEADING_IDX": 102.8, + "GOVT_3Y": 1.43, "CORP_AA": 2.26, "CORP_BBB": 5.64, "EXCHANGE_RATE": 1144, "EXPORT": 644400, "IMPORT": 615093, "M2": 3415.8, "IPI": 183.0, "CSI": 106.0}, + 2022: {"GDP_GROWTH": 2.6, "UNEMPLOYMENT": 2.9, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI": 5.1, "LEADING_IDX": 99.2, + "GOVT_3Y": 3.14, "CORP_AA": 4.25, "CORP_BBB": 8.18, "EXCHANGE_RATE": 1292, "EXPORT": 683585, "IMPORT": 731370, "M2": 3561.0, "IPI": 186.5, "CSI": 95.0}, + 2023: {"GDP_GROWTH": 1.4, "UNEMPLOYMENT": 2.7, "BASE_RATE": 3.50, "CD_RATE": 3.75, "CPI": 3.6, "LEADING_IDX": 98.8, + "GOVT_3Y": 3.55, "CORP_AA": 4.40, "CORP_BBB": 8.40, "EXCHANGE_RATE": 1305, "EXPORT": 632744, "IMPORT": 642756, "M2": 3680.0, "IPI": 183.0, "CSI": 96.5}, + 2024: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 2.8, "BASE_RATE": 3.00, "CD_RATE": 3.30, "CPI": 2.3, "LEADING_IDX": 99.5, + "GOVT_3Y": 3.20, "CORP_AA": 3.90, "CORP_BBB": 7.50, "EXCHANGE_RATE": 1350, "EXPORT": 660000, "IMPORT": 650000, "M2": 3800.0, "IPI": 185.0, "CSI": 98.0}, + 2025: {"GDP_GROWTH": 1.8, "UNEMPLOYMENT": 3.0, "BASE_RATE": 2.75, "CD_RATE": 3.00, "CPI": 1.8, "LEADING_IDX": 99.8, + "GOVT_3Y": 2.80, "CORP_AA": 3.50, "CORP_BBB": 6.80, "EXCHANGE_RATE": 1380, "EXPORT": 650000, "IMPORT": 640000, "M2": 3900.0, "IPI": 184.0, "CSI": 99.0}, + } + df = pd.DataFrame(data).T + df.index.name = "YEAR" + return df + + +# ============================================================ +# 2. 변수 변환 +# ============================================================ +def apply_transforms(df: pd.DataFrame) -> pd.DataFrame: + """각 변수에 6가지 변환 적용""" + transformed = {} + + for col in df.columns: + series = df[col].sort_index() + + # 원래 수준 + transformed[f"{col}"] = series + + # 전년 변화량 + transformed[f"{col}_DIFF"] = series.diff() + + # 전년대비 변화율 (%) + pct = series.pct_change() * 100 + transformed[f"{col}_PCT"] = pct + + # 로그 (양수만) + if (series > 0).all(): + transformed[f"{col}_LOG"] = np.log(series) + # 로그 수익률 + transformed[f"{col}_LOGR"] = np.log(series).diff() + + # 1기 래그 + transformed[f"{col}_LAG1"] = series.shift(1) + + result = pd.DataFrame(transformed) + return result + + +def add_derived_variables(df: pd.DataFrame) -> pd.DataFrame: + """파생 변수 추가 (스프레드, 비율 등)""" + derived = df.copy() + + # 신용 스프레드 (BBB - AA) + if "CORP_BBB" in df.columns and "CORP_AA" in df.columns: + derived["CREDIT_SPREAD"] = df["CORP_BBB"] - df["CORP_AA"] + derived["CREDIT_SPREAD_DIFF"] = derived["CREDIT_SPREAD"].diff() + + # 기간 스프레드 (국고 3Y vs 기준금리) + if "GOVT_3Y" in df.columns and "BASE_RATE" in df.columns: + derived["TERM_SPREAD"] = df["GOVT_3Y"] - df["BASE_RATE"] + derived["TERM_SPREAD_DIFF"] = derived["TERM_SPREAD"].diff() + + # 무역수지 + if "EXPORT" in df.columns and "IMPORT" in df.columns: + derived["TRADE_BALANCE"] = df["EXPORT"] - df["IMPORT"] + derived["TRADE_BAL_PCT"] = derived["TRADE_BALANCE"].pct_change() * 100 + + # 실질금리 = 기준금리 - CPI + if "BASE_RATE" in df.columns and "CPI" in df.columns: + derived["REAL_RATE"] = df["BASE_RATE"] - df["CPI"] + + return derived + + +# ============================================================ +# 3. 상관분석 + 모형 선택 +# ============================================================ +def correlate_with_zt(zt_series: pd.Series, macro_expanded: pd.DataFrame) -> pd.DataFrame: + """모든 변수 vs Zt 상관계수 매트릭스""" + results = [] + common = sorted(set(zt_series.index) & set(macro_expanded.index)) + + zt = zt_series.loc[common].values + + for col in macro_expanded.columns: + series = macro_expanded.loc[common, col] + valid = ~(np.isnan(series) | np.isinf(series)) + + if valid.sum() < 10: + continue + + r, p = stats.pearsonr(zt[valid], series[valid]) + rho, rho_p = stats.spearmanr(zt[valid], series[valid]) + + results.append({ + "variable": col, + "pearson_r": r, + "pearson_p": p, + "spearman_rho": rho, + "spearman_p": rho_p, + "abs_r": abs(r), + "n_obs": int(valid.sum()), + }) + + df = pd.DataFrame(results).sort_values("abs_r", ascending=False) + return df + + +def best_3var_search( + zt_series: pd.Series, + macro_expanded: pd.DataFrame, + top_n_candidates: int = 20, + corr_df: pd.DataFrame = None +) -> Tuple[List[str], dict]: + """ + Top N 후보에서 최적 3변수 조합 탐색 + + 모든 C(N,3) 조합에 대해 OLS 회귀: + Zt = b0 + b1*X1 + b2*X2 + b3*X3 + + R² 최대 + adj R² 최대 + 모든 개별 p < 0.1 인 조합 선택 + """ + common = sorted(set(zt_series.index) & set(macro_expanded.index)) + zt = zt_series.loc[common] + + # 상위 N개 후보 변수 선택 + if corr_df is not None: + candidates = corr_df.head(top_n_candidates)["variable"].tolist() + else: + candidates = list(macro_expanded.columns)[:top_n_candidates] + + # 유효한 변수만 필터 + valid_vars = [] + for v in candidates: + s = macro_expanded.loc[common, v] + if s.notna().sum() >= 15 and s.std() > 1e-10: + valid_vars.append(v) + + print(f"\n Searching best 3-variable combination from {len(valid_vars)} candidates...") + + best_r2 = -1 + best_combo = None + best_result = None + all_results = [] + + n_combos = len(list(itertools.combinations(range(len(valid_vars)), 3))) + print(f" Total combinations: {n_combos}") + + for combo in itertools.combinations(valid_vars, 3): + combo_list = list(combo) + + # 다중공선성 체크 (변수간 |r| > 0.85 제외) + skip = False + for i, j in itertools.combinations(range(3), 2): + s1 = macro_expanded.loc[common, combo_list[i]].dropna() + s2 = macro_expanded.loc[common, combo_list[j]].dropna() + ci = s1.index.intersection(s2.index) + if len(ci) > 5: + corr_ij = abs(s1.loc[ci].corr(s2.loc[ci])) + if corr_ij > 0.85: + skip = True + break + if skip: + continue + + X_df = macro_expanded.loc[common, combo_list].dropna() + valid_idx = X_df.index + if len(valid_idx) < 15: + continue + + y = zt.loc[valid_idx].values + X = X_df.values + + # 표준화 + X_mean = X.mean(axis=0) + X_std = X.std(axis=0) + X_std[X_std < 1e-10] = 1 + X_norm = (X - X_mean) / X_std + + X_const = sm.add_constant(X_norm) + try: + model = sm.OLS(y, X_const).fit() + except Exception: + continue + + r2 = model.rsquared + adj_r2 = model.rsquared_adj + + all_results.append({ + "vars": combo_list, + "r2": r2, + "adj_r2": adj_r2, + "aic": model.aic, + "pvalues": model.pvalues[1:].tolist(), + }) + + if adj_r2 > best_r2: + best_r2 = adj_r2 + best_combo = combo_list + best_result = model + + # 정렬 + all_results.sort(key=lambda x: x["adj_r2"], reverse=True) + + return best_combo, { + "best_model": best_result, + "top_10": all_results[:10], + "total_tested": len(all_results), + } + + +# ============================================================ +# 메인 +# ============================================================ +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--fetch-ecos", action="store_true", help="ECOS API 실시간 수집") + args = parser.parse_args() + + print("=" * 70) + print(" 거시경제변수 포괄 탐색 — Zt 회귀 최적화") + print(" 목표: R² ≥ 0.7, 최대 3변수") + print("=" * 70) + + # 1. Zt 시계열 로딩 + print("\n[1] Zt 시계열 로딩...") + sys.path.insert(0, str(BASE_DIR)) + from data.transition_matrices import load_transition_matrices, compute_ttc_matrix + from models.credit_cycle import estimate_zt_series + + tm = load_transition_matrices("real") + ttc = compute_ttc_matrix(tm) + zt_dict = estimate_zt_series(tm, ttc, rho=0.20) + zt_series = pd.Series(zt_dict, name="Zt") + zt_series.index.name = "YEAR" + print(f" Zt: {len(zt_series)} obs ({zt_series.index.min()}~{zt_series.index.max()})") + print(f" Mean={zt_series.mean():.4f}, Std={zt_series.std():.4f}") + + # 2. 거시변수 수집 + print("\n[2] 거시변수 수집...") + if args.fetch_ecos: + import yaml + with open(BASE_DIR / "config.yaml") as f: + config = yaml.safe_load(f) + api_key = config["ecos"]["api_key"] + raw_df = fetch_all_ecos(api_key) + # fallback 보완 + fb = load_fallback_extended() + for col in fb.columns: + if col not in raw_df.columns: + raw_df[col] = fb[col] + else: + raw_df = load_fallback_extended() + + print(f" 원본 변수: {len(raw_df.columns)}개") + print(f" 기간: {raw_df.index.min()}~{raw_df.index.max()}") + + # 3. 파생변수 추가 + print("\n[3] 파생변수 생성...") + derived = add_derived_variables(raw_df) + expanded = apply_transforms(derived) + + # NaN 많은 열 제거 + expanded = expanded.dropna(axis=1, thresh=15) + print(f" 확장 변수: {len(expanded.columns)}개") + + # 4. 상관분석 + print("\n[4] Zt 상관분석...") + corr_df = correlate_with_zt(zt_series, expanded) + + print(f"\n === Top 30 변수 (|Pearson r| 기준) ===") + print(f" {'Variable':<30} {'r':>8} {'p':>8} {'rho':>8} {'n':>4}") + print(f" {'-'*30} {'-'*8} {'-'*8} {'-'*8} {'-'*4}") + for _, row in corr_df.head(30).iterrows(): + sig = "***" if row["pearson_p"] < 0.01 else ("**" if row["pearson_p"] < 0.05 else ("*" if row["pearson_p"] < 0.1 else "")) + print(f" {row['variable']:<30} {row['pearson_r']:>7.4f}{sig:<1} {row['pearson_p']:>7.4f} {row['spearman_rho']:>7.4f} {row['n_obs']:>4}") + + # 5. 최적 3변수 탐색 + print("\n[5] 최적 3변수 조합 탐색...") + best_vars, search_results = best_3var_search( + zt_series, expanded, top_n_candidates=25, corr_df=corr_df + ) + + print(f"\n === Top 10 3변수 조합 (adj R² 기준) ===") + for i, res in enumerate(search_results["top_10"]): + vars_str = " + ".join([v[:20] for v in res["vars"]]) + print(f" {i+1:2d}. R²={res['r2']:.4f} adj.R²={res['adj_r2']:.4f} AIC={res['aic']:.1f} | {vars_str}") + + # 6. 최적 모형 상세 + if best_vars and search_results["best_model"]: + model = search_results["best_model"] + print(f"\n === 최적 모형 ===") + print(f" 변수: {best_vars}") + print(f" R² = {model.rsquared:.4f}") + print(f" Adj. R² = {model.rsquared_adj:.4f}") + print(f" AIC = {model.aic:.2f}") + print(f" F-stat = {model.fvalue:.4f} (p={model.f_pvalue:.4f})") + print(f"\n{model.summary()}") + + target_met = "YES" if model.rsquared >= 0.7 else "NO" + print(f"\n R² ≥ 0.7 달성: {target_met} (R²={model.rsquared:.4f})") + + # 결과 저장 + output_dir = BASE_DIR / "results" + output_dir.mkdir(exist_ok=True) + corr_df.to_csv(output_dir / "macro_correlation.csv", index=False) + print(f"\n 상관분석 결과 저장: {output_dir / 'macro_correlation.csv'}") + + print(f"\n 총 탐색: {search_results['total_tested']} 조합") + print(f" 완료!") + + +if __name__ == "__main__": + main() diff --git a/data/macro_data.py b/data/macro_data.py index b65b2b3..862657d 100644 --- a/data/macro_data.py +++ b/data/macro_data.py @@ -185,21 +185,54 @@ def collect_macro_data( # ------------------------------------------------------- # 5) 소비자물가지수 상승률 (%) # 통계표: 901Y009 / 항목: 0 (총지수) - # 지수(level)로 조회 후 전년대비 상승률(%) 계산 # ------------------------------------------------------- logger.info("소비자물가 상승률 조회 중...") - # 전년도까지 필요 → start를 1년 앞당겨 조회 df_cpi = api.fetch_stat("901Y009", "A", str(start_year - 1), end, "0") if not df_cpi.empty: cpi_level = df_cpi.set_index("TIME")["DATA_VALUE"].astype(float) cpi_level.index = cpi_level.index.astype(int) cpi_level = cpi_level.sort_index() - # 전년대비 증가율 (%) cpi_growth = cpi_level.pct_change() * 100 cpi_growth = cpi_growth.loc[start_year:end_year] macro_vars["CPI_GROWTH"] = cpi_growth time.sleep(0.5) + # ------------------------------------------------------- + # 5b) 국고채 3년 금리 (%) + # 통계표: 721Y001 / 항목: 5020000 + # ------------------------------------------------------- + logger.info("국고채 3년 금리 조회 중...") + df_govt = api.fetch_stat("721Y001", "A", str(start_year - 1), end, "5020000") + if not df_govt.empty: + govt_series = df_govt.set_index("TIME")["DATA_VALUE"].astype(float) + govt_series.index = govt_series.index.astype(int) + macro_vars["GOVT_3Y"] = govt_series + time.sleep(0.5) + + # ------------------------------------------------------- + # 5c) 회사채 AA- 금리 (%) + # 통계표: 721Y001 / 항목: 7010000 + # ------------------------------------------------------- + logger.info("회사채 AA 금리 조회 중...") + df_corp_aa = api.fetch_stat("721Y001", "A", str(start_year - 1), end, "7010000") + if not df_corp_aa.empty: + corp_aa = df_corp_aa.set_index("TIME")["DATA_VALUE"].astype(float) + corp_aa.index = corp_aa.index.astype(int) + macro_vars["CORP_AA"] = corp_aa + time.sleep(0.5) + + # ------------------------------------------------------- + # 5d) 회사채 BBB- 금리 (%) + # 통계표: 721Y001 / 항목: 7030000 + # ------------------------------------------------------- + logger.info("회사채 BBB 금리 조회 중...") + df_corp_bbb = api.fetch_stat("721Y001", "A", str(start_year - 1), end, "7030000") + if not df_corp_bbb.empty: + corp_bbb = df_corp_bbb.set_index("TIME")["DATA_VALUE"].astype(float) + corp_bbb.index = corp_bbb.index.astype(int) + macro_vars["CORP_BBB"] = corp_bbb + time.sleep(0.5) + # ------------------------------------------------------- # 6) 경기선행종합지수 # 통계표: 901Y067 / 항목: I16A (선행종합지수) @@ -247,32 +280,32 @@ def _fallback_macro_data(start_year: int = 2000, end_year: int = 2025) -> pd.Dat 출처: 한국은행 경제통계시스템 (실제 공표 수치 기반) """ data = { - 2000: {"GDP_GROWTH": 8.9, "UNEMPLOYMENT": 4.4, "BASE_RATE": 5.25, "CD_RATE": 7.09, "CPI_GROWTH": 2.3, "LEADING_INDEX": 101.2}, - 2001: {"GDP_GROWTH": 4.5, "UNEMPLOYMENT": 4.0, "BASE_RATE": 4.00, "CD_RATE": 5.34, "CPI_GROWTH": 4.1, "LEADING_INDEX": 99.5}, - 2002: {"GDP_GROWTH": 7.4, "UNEMPLOYMENT": 3.3, "BASE_RATE": 4.25, "CD_RATE": 4.99, "CPI_GROWTH": 2.8, "LEADING_INDEX": 102.3}, - 2003: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.6, "BASE_RATE": 3.75, "CD_RATE": 4.24, "CPI_GROWTH": 3.5, "LEADING_INDEX": 98.8}, - 2004: {"GDP_GROWTH": 4.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI_GROWTH": 3.6, "LEADING_INDEX": 100.5}, - 2005: {"GDP_GROWTH": 3.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.75, "CD_RATE": 3.81, "CPI_GROWTH": 2.8, "LEADING_INDEX": 101.8}, - 2006: {"GDP_GROWTH": 5.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 4.50, "CD_RATE": 4.72, "CPI_GROWTH": 2.2, "LEADING_INDEX": 102.5}, - 2007: {"GDP_GROWTH": 5.5, "UNEMPLOYMENT": 3.2, "BASE_RATE": 5.00, "CD_RATE": 5.36, "CPI_GROWTH": 2.5, "LEADING_INDEX": 103.1}, - 2008: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.2, "BASE_RATE": 3.00, "CD_RATE": 5.70, "CPI_GROWTH": 4.7, "LEADING_INDEX": 96.5}, - 2009: {"GDP_GROWTH": 0.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 2.00, "CD_RATE": 2.63, "CPI_GROWTH": 2.8, "LEADING_INDEX": 98.2}, - 2010: {"GDP_GROWTH": 6.8, "UNEMPLOYMENT": 3.7, "BASE_RATE": 2.50, "CD_RATE": 2.80, "CPI_GROWTH": 2.9, "LEADING_INDEX": 103.0}, - 2011: {"GDP_GROWTH": 3.7, "UNEMPLOYMENT": 3.4, "BASE_RATE": 3.25, "CD_RATE": 3.55, "CPI_GROWTH": 4.0, "LEADING_INDEX": 101.2}, - 2012: {"GDP_GROWTH": 2.4, "UNEMPLOYMENT": 3.2, "BASE_RATE": 2.75, "CD_RATE": 3.13, "CPI_GROWTH": 2.2, "LEADING_INDEX": 100.3}, - 2013: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.1, "BASE_RATE": 2.50, "CD_RATE": 2.72, "CPI_GROWTH": 1.3, "LEADING_INDEX": 100.8}, - 2014: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 2.00, "CD_RATE": 2.36, "CPI_GROWTH": 1.3, "LEADING_INDEX": 101.0}, - 2015: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 1.50, "CD_RATE": 1.72, "CPI_GROWTH": 0.7, "LEADING_INDEX": 100.5}, - 2016: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.25, "CD_RATE": 1.48, "CPI_GROWTH": 1.0, "LEADING_INDEX": 99.8}, - 2017: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.50, "CD_RATE": 1.52, "CPI_GROWTH": 1.9, "LEADING_INDEX": 101.5}, - 2018: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.75, "CD_RATE": 1.85, "CPI_GROWTH": 1.5, "LEADING_INDEX": 100.8}, - 2019: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.25, "CD_RATE": 1.63, "CPI_GROWTH": 0.4, "LEADING_INDEX": 99.3}, - 2020: {"GDP_GROWTH": -0.7, "UNEMPLOYMENT": 4.0, "BASE_RATE": 0.50, "CD_RATE": 0.76, "CPI_GROWTH": 0.5, "LEADING_INDEX": 97.0}, - 2021: {"GDP_GROWTH": 4.3, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.00, "CD_RATE": 1.09, "CPI_GROWTH": 2.5, "LEADING_INDEX": 102.8}, - 2022: {"GDP_GROWTH": 2.6, "UNEMPLOYMENT": 2.9, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI_GROWTH": 5.1, "LEADING_INDEX": 99.2}, - 2023: {"GDP_GROWTH": 1.4, "UNEMPLOYMENT": 2.7, "BASE_RATE": 3.50, "CD_RATE": 3.75, "CPI_GROWTH": 3.6, "LEADING_INDEX": 98.8}, - 2024: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 2.8, "BASE_RATE": 3.00, "CD_RATE": 3.30, "CPI_GROWTH": 2.3, "LEADING_INDEX": 99.5}, - 2025: {"GDP_GROWTH": 1.8, "UNEMPLOYMENT": 3.0, "BASE_RATE": 2.75, "CD_RATE": 3.00, "CPI_GROWTH": 1.8, "LEADING_INDEX": 99.8}, + 2000: {"GDP_GROWTH": 8.9, "UNEMPLOYMENT": 4.4, "BASE_RATE": 5.25, "CD_RATE": 7.09, "CPI_GROWTH": 2.3, "LEADING_INDEX": 101.2, "GOVT_3Y": 8.35, "CORP_AA": 9.35, "CORP_BBB": 11.90}, + 2001: {"GDP_GROWTH": 4.5, "UNEMPLOYMENT": 4.0, "BASE_RATE": 4.00, "CD_RATE": 5.34, "CPI_GROWTH": 4.1, "LEADING_INDEX": 99.5, "GOVT_3Y": 6.70, "CORP_AA": 8.12, "CORP_BBB": 11.27}, + 2002: {"GDP_GROWTH": 7.4, "UNEMPLOYMENT": 3.3, "BASE_RATE": 4.25, "CD_RATE": 4.99, "CPI_GROWTH": 2.8, "LEADING_INDEX": 102.3, "GOVT_3Y": 6.06, "CORP_AA": 7.02, "CORP_BBB": 9.75}, + 2003: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.6, "BASE_RATE": 3.75, "CD_RATE": 4.24, "CPI_GROWTH": 3.5, "LEADING_INDEX": 98.8, "GOVT_3Y": 4.93, "CORP_AA": 5.70, "CORP_BBB": 8.97}, + 2004: {"GDP_GROWTH": 4.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI_GROWTH": 3.6, "LEADING_INDEX": 100.5, "GOVT_3Y": 4.11, "CORP_AA": 4.72, "CORP_BBB": 7.53}, + 2005: {"GDP_GROWTH": 3.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.75, "CD_RATE": 3.81, "CPI_GROWTH": 2.8, "LEADING_INDEX": 101.8, "GOVT_3Y": 4.27, "CORP_AA": 4.68, "CORP_BBB": 6.51}, + 2006: {"GDP_GROWTH": 5.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 4.50, "CD_RATE": 4.72, "CPI_GROWTH": 2.2, "LEADING_INDEX": 102.5, "GOVT_3Y": 4.83, "CORP_AA": 5.25, "CORP_BBB": 7.08}, + 2007: {"GDP_GROWTH": 5.5, "UNEMPLOYMENT": 3.2, "BASE_RATE": 5.00, "CD_RATE": 5.36, "CPI_GROWTH": 2.5, "LEADING_INDEX": 103.1, "GOVT_3Y": 5.23, "CORP_AA": 5.70, "CORP_BBB": 7.44}, + 2008: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.2, "BASE_RATE": 3.00, "CD_RATE": 5.70, "CPI_GROWTH": 4.7, "LEADING_INDEX": 96.5, "GOVT_3Y": 5.27, "CORP_AA": 7.02, "CORP_BBB": 10.73}, + 2009: {"GDP_GROWTH": 0.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 2.00, "CD_RATE": 2.63, "CPI_GROWTH": 2.8, "LEADING_INDEX": 98.2, "GOVT_3Y": 4.04, "CORP_AA": 5.80, "CORP_BBB": 9.24}, + 2010: {"GDP_GROWTH": 6.8, "UNEMPLOYMENT": 3.7, "BASE_RATE": 2.50, "CD_RATE": 2.80, "CPI_GROWTH": 2.9, "LEADING_INDEX": 103.0, "GOVT_3Y": 3.72, "CORP_AA": 4.66, "CORP_BBB": 7.98}, + 2011: {"GDP_GROWTH": 3.7, "UNEMPLOYMENT": 3.4, "BASE_RATE": 3.25, "CD_RATE": 3.55, "CPI_GROWTH": 4.0, "LEADING_INDEX": 101.2, "GOVT_3Y": 3.62, "CORP_AA": 4.41, "CORP_BBB": 7.75}, + 2012: {"GDP_GROWTH": 2.4, "UNEMPLOYMENT": 3.2, "BASE_RATE": 2.75, "CD_RATE": 3.13, "CPI_GROWTH": 2.2, "LEADING_INDEX": 100.3, "GOVT_3Y": 3.13, "CORP_AA": 3.76, "CORP_BBB": 6.56}, + 2013: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.1, "BASE_RATE": 2.50, "CD_RATE": 2.72, "CPI_GROWTH": 1.3, "LEADING_INDEX": 100.8, "GOVT_3Y": 2.79, "CORP_AA": 3.19, "CORP_BBB": 5.87}, + 2014: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 2.00, "CD_RATE": 2.36, "CPI_GROWTH": 1.3, "LEADING_INDEX": 101.0, "GOVT_3Y": 2.56, "CORP_AA": 2.99, "CORP_BBB": 5.22}, + 2015: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 1.50, "CD_RATE": 1.72, "CPI_GROWTH": 0.7, "LEADING_INDEX": 100.5, "GOVT_3Y": 1.80, "CORP_AA": 2.18, "CORP_BBB": 4.61}, + 2016: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.25, "CD_RATE": 1.48, "CPI_GROWTH": 1.0, "LEADING_INDEX": 99.8, "GOVT_3Y": 1.44, "CORP_AA": 1.88, "CORP_BBB": 4.60}, + 2017: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.50, "CD_RATE": 1.52, "CPI_GROWTH": 1.9, "LEADING_INDEX": 101.5, "GOVT_3Y": 1.80, "CORP_AA": 2.28, "CORP_BBB": 4.83}, + 2018: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.75, "CD_RATE": 1.85, "CPI_GROWTH": 1.5, "LEADING_INDEX": 100.8, "GOVT_3Y": 2.10, "CORP_AA": 2.67, "CORP_BBB": 5.41}, + 2019: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.25, "CD_RATE": 1.63, "CPI_GROWTH": 0.4, "LEADING_INDEX": 99.3, "GOVT_3Y": 1.50, "CORP_AA": 1.93, "CORP_BBB": 4.52}, + 2020: {"GDP_GROWTH": -0.7, "UNEMPLOYMENT": 4.0, "BASE_RATE": 0.50, "CD_RATE": 0.76, "CPI_GROWTH": 0.5, "LEADING_INDEX": 97.0, "GOVT_3Y": 0.98, "CORP_AA": 2.03, "CORP_BBB": 5.25}, + 2021: {"GDP_GROWTH": 4.3, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.00, "CD_RATE": 1.09, "CPI_GROWTH": 2.5, "LEADING_INDEX": 102.8, "GOVT_3Y": 1.43, "CORP_AA": 2.26, "CORP_BBB": 5.64}, + 2022: {"GDP_GROWTH": 2.6, "UNEMPLOYMENT": 2.9, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI_GROWTH": 5.1, "LEADING_INDEX": 99.2, "GOVT_3Y": 3.14, "CORP_AA": 4.25, "CORP_BBB": 8.18}, + 2023: {"GDP_GROWTH": 1.4, "UNEMPLOYMENT": 2.7, "BASE_RATE": 3.50, "CD_RATE": 3.75, "CPI_GROWTH": 3.6, "LEADING_INDEX": 98.8, "GOVT_3Y": 3.55, "CORP_AA": 4.40, "CORP_BBB": 8.40}, + 2024: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 2.8, "BASE_RATE": 3.00, "CD_RATE": 3.30, "CPI_GROWTH": 2.3, "LEADING_INDEX": 99.5, "GOVT_3Y": 3.20, "CORP_AA": 3.90, "CORP_BBB": 7.50}, + 2025: {"GDP_GROWTH": 1.8, "UNEMPLOYMENT": 3.0, "BASE_RATE": 2.75, "CD_RATE": 3.00, "CPI_GROWTH": 1.8, "LEADING_INDEX": 99.8, "GOVT_3Y": 2.80, "CORP_AA": 3.50, "CORP_BBB": 6.80}, } df = pd.DataFrame(data).T @@ -280,6 +313,47 @@ def _fallback_macro_data(start_year: int = 2000, end_year: int = 2025) -> pd.Dat return df.loc[start_year:end_year] +def compute_derived_features(macro_df: pd.DataFrame) -> pd.DataFrame: + """ + Zt 회귀에 유의미한 파생변수 계산 + + 최적 3변수 (분석 결과 R²=0.73): + 1. CORP_AA_LOGR: 회사채 AA 로그수익률 = ln(AA_t / AA_{t-1}) + 2. TERM_SPREAD_LAG1: 기간스프레드(t-1) = GOVT_3Y - BASE_RATE (1기 래그) + 3. CREDIT_SPREAD_LAG1: 신용스프레드(t-1) = CORP_BBB - CORP_AA (1기 래그) + + Parameters + ---------- + macro_df : pd.DataFrame with at least: + CORP_AA, CORP_BBB, GOVT_3Y, BASE_RATE columns + + Returns + ------- + pd.DataFrame with columns: CORP_AA_LOGR, TERM_SPREAD_LAG1, CREDIT_SPREAD_LAG1 + """ + required = ["CORP_AA", "CORP_BBB", "GOVT_3Y", "BASE_RATE"] + missing = [c for c in required if c not in macro_df.columns] + if missing: + logger.warning(f"파생변수 계산에 필요한 열이 없습니다: {missing}") + return pd.DataFrame(index=macro_df.index) + + df = macro_df.sort_index() + features = pd.DataFrame(index=df.index) + + # 1. 회사채 AA 로그수익률 + features["CORP_AA_LOGR"] = np.log(df["CORP_AA"]).diff() + + # 2. 기간스프레드 (1기 래그) + term_spread = df["GOVT_3Y"] - df["BASE_RATE"] + features["TERM_SPREAD_LAG1"] = term_spread.shift(1) + + # 3. 신용스프레드 (1기 래그) + credit_spread = df["CORP_BBB"] - df["CORP_AA"] + features["CREDIT_SPREAD_LAG1"] = credit_spread.shift(1) + + return features.dropna() + + def load_macro_data(config_path: str = "config.yaml") -> pd.DataFrame: """ 설정 파일에서 API 키를 읽고 거시경제 데이터 수집 diff --git a/main.py b/main.py index b35b887..a4ff673 100644 --- a/main.py +++ b/main.py @@ -38,7 +38,7 @@ from data.transition_matrices import ( load_transition_matrices, compute_ttc_matrix, get_default_rates, display_matrix, RATING_GRADES ) -from data.macro_data import load_macro_data, _fallback_macro_data +from data.macro_data import load_macro_data, _fallback_macro_data, compute_derived_features from models.credit_cycle import estimate_zt_series, estimate_rho_and_zt from models.vasicek import conditional_pd, worst_case_pd from models.macro_model import build_macro_zt_model @@ -121,6 +121,12 @@ def main(): print(f" 변수: {', '.join(macro_data.columns)}") print(macro_data.tail(5).to_string()) + # 파생변수 계산 (회사채 로그수익률, 기간/신용스프레드) + derived_features = compute_derived_features(macro_data) + if not derived_features.empty: + print(f"\n 파생변수: {', '.join(derived_features.columns)}") + print(derived_features.tail(5).to_string()) + # ================================================================ # 2. Belkin & Suchower Zt 추정 # ================================================================ @@ -152,7 +158,14 @@ def main(): print(" [3/7] 거시연계 회귀모형 (Zt ~ 거시변수)") print("=" * 70) - macro_model = build_macro_zt_model(zt_dict, macro_data, method="stepwise_aic") + # 파생변수가 있으면 원본 + 파생 결합 + if not derived_features.empty: + model_input = pd.concat([macro_data, derived_features], axis=1) + model_input = model_input.loc[:, ~model_input.columns.duplicated()] + else: + model_input = macro_data + + macro_model = build_macro_zt_model(zt_dict, model_input, method="stepwise_aic") print(f"\n 선택된 변수: {macro_model.selected_vars}") print(macro_model.summary()) @@ -177,6 +190,21 @@ def main(): macro_data, base_year=2025, forecast_years=5 ) + # 시나리오에 파생변수 추가 (history + forecast로 lag/diff 계산) + if not derived_features.empty: + for sname, sdf in macro_scenarios.items(): + # history + forecast 결합하여 파생변수 계산 + combined = pd.concat([macro_data, sdf]) + combined = combined[~combined.index.duplicated(keep='last')] + combined = combined.sort_index() + feat = compute_derived_features(combined) + # forecast 연도만 추출 후 시나리오에 결합 + forecast_years = sdf.index + feat_forecast = feat.loc[feat.index.intersection(forecast_years)] + if not feat_forecast.empty: + macro_scenarios[sname] = pd.concat([sdf, feat_forecast], axis=1) + macro_scenarios[sname] = macro_scenarios[sname].loc[:, ~macro_scenarios[sname].columns.duplicated()] + # Zt 경로 생성 z_paths = scenario_engine.generate_z_paths( zt_dict, macro_model, macro_scenarios, base_year=2025