From cc55acc330207b1d86114dd2e7ea7ed25e6d1367 Mon Sep 17 00:00:00 2001 From: Variet Agent Date: Wed, 11 Mar 2026 07:55:36 +0900 Subject: [PATCH] feat(analysis): v3 re-analysis with 31 ECOS variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Best model: CREDIT_SPREAD_LAG1 + USDKRW + HOUSING_PRICE (R²=0.646) - 31 raw vars → 80+ features (DIFF/LAG1/PCT/spreads) - 2267 valid combos searched (collinearity filtered) - All Top 10 sign-consistent - Zt sign corrected: Z+ = 호황 (Belkin convention) --- data/macro_analysis.py | 304 ++++++++++++++++++++--------------------- 1 file changed, 150 insertions(+), 154 deletions(-) diff --git a/data/macro_analysis.py b/data/macro_analysis.py index 7163651..97dc063 100644 --- a/data/macro_analysis.py +++ b/data/macro_analysis.py @@ -1,28 +1,29 @@ """ -거시변수 재분석 v2 — 금리 DIFF 전용 + 계수 부호 검증 +거시변수 재분석 v3 — 31변수 확장 + Zt 부호 수정반영 규칙: -1. 금리 변수 (BASE_RATE, CD_RATE, GOVT_3Y, CORP_AA, CORP_BBB): DIFF만 허용 -2. 가격/지수 변수: DIFF, PCT, LOG, LOGR 허용 -3. 이미 변화율인 변수 (GDP_GROWTH, CPI): 원본(LEVEL), LAG1만 허용 +1. 금리 변수: DIFF만 허용 (LEVEL/LOG 등 제외) +2. 지수/금액: 원본 + DIFF/PCT/LAG1 +3. 이미 변화율 변수: 원본 + LAG1만 4. 계수 부호 경제적 일관성 체크 5. Zt: 2000~2025 (26obs) -Zt 부호 규칙: 양수 = 부도율 높음 = 경기 나쁨 +Zt 부호 (Belkin 수정후): **양수 = 호황** (PD 하락), 음수 = 불황 -경제적 부호 기대: - GDP_GROWTH: 음(-) — 성장 ↑ → 부도 ↓ → Zt ↓ - UNEMPLOYMENT: 양(+) — 실업 ↑ → 부도 ↑ → Zt ↑ - BASE_RATE_DIFF: 양(+) — 금리인상 → 부도 ↑ → Zt ↑ (또는 래그 효과) - CD_RATE_DIFF: 양(+) - CPI: 양(+) — 물가 급등 → 구매력 ↓ → 부도 ↑ → Zt ↑ - LEADING_IDX: 음(-) — 선행지수 ↑ → 경기 호전 → Zt ↓ - CORP_AA_DIFF: 양(+) — 회사채 금리 상승 → 자금조달 비용 ↑ → 부도 ↑ - CORP_BBB_DIFF: 양(+) - CREDIT_SPREAD: 양(+) — 스프레드 확대 → 신용위험 ↑ → Zt ↑ - TERM_SPREAD: 어느쪽이든 가능 (역전시 침체 신호 = 음수) - EXCHANGE_RATE: 양(+) — 원화약세 → 외화부채 ↑ → 부도 ↑ - EXPORT: 음(-) — 수출 ↑ → 경기 좋음 → Zt ↓ +경제적 부호 기대 (Zt↑ = 호황): + GDP_GROWTH: 양(+) — 성장 ↑ → 호황 → Zt ↑ + UNEMPLOYMENT: 음(-) — 실업 ↑ → 불황 → Zt ↓ + BASE_RATE_DIFF: 음(-) — 금리인상 → 긴축 → Zt ↓ + CPI_GROWTH: 음(-) — 물가급등 → 구매력↓ → Zt ↓ + LEADING_INDEX: 양(+) — 선행 ↑ → 호황 → Zt ↑ + CREDIT_SPREAD: 음(-) — 스프레드↑ → 위험↑ → Zt ↓ + EXPORT: 양(+) — 수출 ↑ → 호황 → Zt ↑ + KOSPI: 양(+) — 주가↑ → 호황 → Zt ↑ + OIL_PRICE: 음(-) — 유가↑ → 비용↑ → Zt ↓ (수입국) + DISHONOR_RATE: 음(-) — 부도율↑ → 불황 → Zt ↓ + USDKRW: 음(-) — 원화약세 → 불황 → Zt ↓ + BSI_MANUF: 양(+) — BSI↑ → 경기전망↑ → Zt ↑ + CSI: 양(+) — 소비심리↑ → 호황 → Zt ↑ """ import sys, io, itertools @@ -40,157 +41,137 @@ warnings.filterwarnings("ignore") BASE_DIR = Path(__file__).parent.parent -# 경제적 부호 기대 (양수 = 이 변수 증가 시 Zt 증가) +# Zt 부호: 양수=호황 (Belkin 수정후) +# 각 변수가 증가할때 Zt가 어느 방향으로 움직여야 하는지 EXPECTED_SIGNS = { - # 원본 - "GDP_GROWTH": -1, "UNEMPLOYMENT": +1, - "CPI": +1, "CPI_GROWTH": +1, - "LEADING_IDX": -1, "LEADING_INDEX": -1, - # 금리 차분 - "BASE_RATE_DIFF": +1, "CD_RATE_DIFF": +1, - "GOVT_3Y_DIFF": +1, - "CORP_AA_DIFF": +1, "CORP_BBB_DIFF": +1, - # 파생 - "CREDIT_SPREAD": +1, "CREDIT_SPREAD_DIFF": +1, - "CREDIT_SPREAD_LAG1": +1, - "TERM_SPREAD": 0, # 0 = 부호 제약 없음 - "TERM_SPREAD_DIFF": 0, - "TERM_SPREAD_LAG1": 0, - # 기타 - "EXCHANGE_RATE": +1, "EXCHANGE_RATE_DIFF": +1, "EXCHANGE_RATE_PCT": +1, - "EXCHANGE_RATE_LAG1": +1, - "EXPORT_PCT": -1, "IMPORT_PCT": +1, - "TRADE_BALANCE": -1, "TRADE_BALANCE_DIFF": -1, - "TRADE_BAL_PCT": -1, - "IPI": -1, "IPI_DIFF": -1, "IPI_PCT": -1, - "CSI": -1, "CSI_DIFF": -1, "CSI_LAG1": -1, - "M2_PCT": 0, "M2_LOGR": 0, + # --- 성장/경기 --- + "GDP_GROWTH": +1, "GDP_GROWTH_LAG1": +1, + "LEADING_INDEX": +1, "LEADING_INDEX_DIFF": +1, "LEADING_INDEX_LAG1": +1, + "COINCIDENT": +1, "COINCIDENT_DIFF": +1, "COINCIDENT_LAG1": +1, + "BSI_MANUF": +1, "BSI_MANUF_LAG1": +1, + + # --- 고용 --- + "UNEMPLOYMENT": -1, "UNEMPLOYMENT_LAG1": -1, "UNEMPLOYMENT_DIFF": -1, + "EMPLOYED": +1, "EMPLOYED_DIFF": +1, "EMPLOYED_PCT": +1, "EMPLOYED_LAG1": +1, + "EMPLOYMENT_RATE": +1, "EMPLOYMENT_RATE_DIFF": +1, "EMPLOYMENT_RATE_LAG1": +1, + + # --- 금리 차분 --- + "BASE_RATE_DIFF": -1, "CD_RATE_DIFF": -1, + "GOVT_3Y_DIFF": -1, "GOVT_10Y_DIFF": -1, + "CORP_AA_DIFF": -1, "CORP_BBB_DIFF": -1, + # 금리 래그 (레벨): 부호 방향 불확실 → 제약 없음 + "BASE_RATE_LAG1": 0, "CD_RATE_LAG1": 0, + "GOVT_3Y_LAG1": 0, "GOVT_10Y_LAG1": 0, + "CORP_AA_LAG1": 0, "CORP_BBB_LAG1": 0, + + # --- 물가 --- + "CPI_GROWTH": -1, "CPI_GROWTH_LAG1": -1, + "IMPORT_PRICE": 0, "IMPORT_PRICE_DIFF": -1, "IMPORT_PRICE_PCT": -1, + "IMPORT_PRICE_LAG1": 0, + "OIL_PRICE": -1, "OIL_PRICE_DIFF": -1, "OIL_PRICE_PCT": -1, "OIL_PRICE_LAG1": -1, + + # --- 스프레드/파생 --- + "CREDIT_SPREAD": -1, "CREDIT_SPREAD_DIFF": -1, "CREDIT_SPREAD_LAG1": -1, + "TERM_SPREAD": 0, "TERM_SPREAD_DIFF": 0, "TERM_SPREAD_LAG1": 0, "REAL_RATE": 0, "REAL_RATE_DIFF": 0, - # 래그 - "GDP_GROWTH_LAG1": -1, "UNEMPLOYMENT_LAG1": +1, - "CPI_LAG1": +1, "CPI_GROWTH_LAG1": +1, + + # --- 교역 --- + "EXPORT_PCT": +1, "EXPORT_DIFF": +1, + "IMPORT_AMT_PCT": -1, "IMPORT_AMT_DIFF": -1, + "TRADE_BALANCE": +1, "TRADE_BALANCE_DIFF": +1, + "CURRENT_ACCOUNT": +1, "CURRENT_ACCOUNT_DIFF": +1, "CURRENT_ACCOUNT_LAG1": +1, + + # --- 금융 --- + "USDKRW": -1, "USDKRW_DIFF": -1, "USDKRW_PCT": -1, "USDKRW_LAG1": -1, + "M2_PCT": 0, + "KOSPI": +1, "KOSPI_PCT": +1, "KOSPI_DIFF": +1, "KOSPI_LAG1": +1, + "DISHONOR_RATE": -1, "DISHONOR_RATE_DIFF": -1, "DISHONOR_RATE_LAG1": -1, + + # --- 소비/심리 --- + "CSI": +1, "CSI_DIFF": +1, "CSI_LAG1": +1, + "RETAIL_SALES": +1, "RETAIL_SALES_DIFF": +1, "RETAIL_SALES_PCT": +1, "RETAIL_SALES_LAG1": +1, + + # --- 투자/생산 --- + "IPI": +1, "IPI_DIFF": +1, "IPI_LAG1": +1, + "SPI": +1, "SPI_DIFF": +1, "SPI_LAG1": +1, + "FACILITY_INVEST": +1, "FACILITY_INVEST_DIFF": +1, "FACILITY_INVEST_PCT": +1, "FACILITY_INVEST_LAG1": +1, + + # --- 부동산/가계 --- + "HOUSING_PRICE": 0, "HOUSING_PRICE_DIFF": 0, "HOUSING_PRICE_LAG1": 0, + "HOUSEHOLD_DEBT": 0, "HOUSEHOLD_DEBT_PCT": 0, + "CONSTRUCTION_DONE": 0, "CONSTRUCTION_DONE_DIFF": 0, } # 금리 변수 목록 (DIFF만 허용) -RATE_VARS = {"BASE_RATE", "CD_RATE", "GOVT_3Y", "CORP_AA", "CORP_BBB"} +RATE_VARS = {"BASE_RATE", "CD_RATE", "GOVT_3Y", "GOVT_10Y", "CORP_AA", "CORP_BBB"} +# 이미 변화율/지수인 변수 (원본 + LAG1만) +ALREADY_RATE_VARS = {"GDP_GROWTH", "CPI_GROWTH", "UNEMPLOYMENT", "EMPLOYMENT_RATE"} -def load_fallback(): - """확장 fallback""" - data = { - 2000: {"GDP_GROWTH": 8.9, "UNEMPLOYMENT": 4.4, "BASE_RATE": 5.25, "CD_RATE": 7.09, "CPI": 2.3, "LEADING_IDX": 101.2, - "GOVT_3Y": 8.35, "CORP_AA": 9.35, "CORP_BBB": 11.90, "EXCHANGE_RATE": 1131, "EXPORT": 172268, "IMPORT": 160481, "M2": 651.8, "IPI": 102.5, "CSI": 101.0}, - 2001: {"GDP_GROWTH": 4.5, "UNEMPLOYMENT": 4.0, "BASE_RATE": 4.00, "CD_RATE": 5.34, "CPI": 4.1, "LEADING_IDX": 99.5, - "GOVT_3Y": 6.70, "CORP_AA": 8.12, "CORP_BBB": 11.27, "EXCHANGE_RATE": 1291, "EXPORT": 150439, "IMPORT": 141098, "M2": 736.5, "IPI": 99.5, "CSI": 96.5}, - 2002: {"GDP_GROWTH": 7.4, "UNEMPLOYMENT": 3.3, "BASE_RATE": 4.25, "CD_RATE": 4.99, "CPI": 2.8, "LEADING_IDX": 102.3, - "GOVT_3Y": 6.06, "CORP_AA": 7.02, "CORP_BBB": 9.75, "EXCHANGE_RATE": 1251, "EXPORT": 162471, "IMPORT": 152126, "M2": 816.3, "IPI": 108.5, "CSI": 105.0}, - 2003: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.6, "BASE_RATE": 3.75, "CD_RATE": 4.24, "CPI": 3.5, "LEADING_IDX": 98.8, - "GOVT_3Y": 4.93, "CORP_AA": 5.70, "CORP_BBB": 8.97, "EXCHANGE_RATE": 1192, "EXPORT": 193817, "IMPORT": 178827, "M2": 879.2, "IPI": 109.8, "CSI": 96.0}, - 2004: {"GDP_GROWTH": 4.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI": 3.6, "LEADING_IDX": 100.5, - "GOVT_3Y": 4.11, "CORP_AA": 4.72, "CORP_BBB": 7.53, "EXCHANGE_RATE": 1145, "EXPORT": 253845, "IMPORT": 224463, "M2": 935.3, "IPI": 119.2, "CSI": 97.0}, - 2005: {"GDP_GROWTH": 3.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 3.75, "CD_RATE": 3.81, "CPI": 2.8, "LEADING_IDX": 101.8, - "GOVT_3Y": 4.27, "CORP_AA": 4.68, "CORP_BBB": 6.51, "EXCHANGE_RATE": 1024, "EXPORT": 284419, "IMPORT": 261238, "M2": 1002.7, "IPI": 126.0, "CSI": 100.5}, - 2006: {"GDP_GROWTH": 5.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 4.50, "CD_RATE": 4.72, "CPI": 2.2, "LEADING_IDX": 102.5, - "GOVT_3Y": 4.83, "CORP_AA": 5.25, "CORP_BBB": 7.08, "EXCHANGE_RATE": 955, "EXPORT": 325465, "IMPORT": 309383, "M2": 1089.9, "IPI": 136.0, "CSI": 106.0}, - 2007: {"GDP_GROWTH": 5.5, "UNEMPLOYMENT": 3.2, "BASE_RATE": 5.00, "CD_RATE": 5.36, "CPI": 2.5, "LEADING_IDX": 103.1, - "GOVT_3Y": 5.23, "CORP_AA": 5.70, "CORP_BBB": 7.44, "EXCHANGE_RATE": 929, "EXPORT": 371489, "IMPORT": 356846, "M2": 1181.6, "IPI": 144.5, "CSI": 108.5}, - 2008: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.2, "BASE_RATE": 3.00, "CD_RATE": 5.70, "CPI": 4.7, "LEADING_IDX": 96.5, - "GOVT_3Y": 5.27, "CORP_AA": 7.02, "CORP_BBB": 10.73, "EXCHANGE_RATE": 1103, "EXPORT": 422007, "IMPORT": 435275, "M2": 1263.2, "IPI": 148.2, "CSI": 86.0}, - 2009: {"GDP_GROWTH": 0.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 2.00, "CD_RATE": 2.63, "CPI": 2.8, "LEADING_IDX": 98.2, - "GOVT_3Y": 4.04, "CORP_AA": 5.80, "CORP_BBB": 9.24, "EXCHANGE_RATE": 1276, "EXPORT": 363534, "IMPORT": 323085, "M2": 1404.4, "IPI": 140.0, "CSI": 85.0}, - 2010: {"GDP_GROWTH": 6.8, "UNEMPLOYMENT": 3.7, "BASE_RATE": 2.50, "CD_RATE": 2.80, "CPI": 2.9, "LEADING_IDX": 103.0, - "GOVT_3Y": 3.72, "CORP_AA": 4.66, "CORP_BBB": 7.98, "EXCHANGE_RATE": 1156, "EXPORT": 466384, "IMPORT": 425212, "M2": 1504.3, "IPI": 161.5, "CSI": 107.0}, - 2011: {"GDP_GROWTH": 3.7, "UNEMPLOYMENT": 3.4, "BASE_RATE": 3.25, "CD_RATE": 3.55, "CPI": 4.0, "LEADING_IDX": 101.2, - "GOVT_3Y": 3.62, "CORP_AA": 4.41, "CORP_BBB": 7.75, "EXCHANGE_RATE": 1108, "EXPORT": 555214, "IMPORT": 524413, "M2": 1586.5, "IPI": 168.0, "CSI": 100.0}, - 2012: {"GDP_GROWTH": 2.4, "UNEMPLOYMENT": 3.2, "BASE_RATE": 2.75, "CD_RATE": 3.13, "CPI": 2.2, "LEADING_IDX": 100.3, - "GOVT_3Y": 3.13, "CORP_AA": 3.76, "CORP_BBB": 6.56, "EXCHANGE_RATE": 1127, "EXPORT": 547870, "IMPORT": 519584, "M2": 1673.5, "IPI": 168.2, "CSI": 100.5}, - 2013: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.1, "BASE_RATE": 2.50, "CD_RATE": 2.72, "CPI": 1.3, "LEADING_IDX": 100.8, - "GOVT_3Y": 2.79, "CORP_AA": 3.19, "CORP_BBB": 5.87, "EXCHANGE_RATE": 1095, "EXPORT": 559632, "IMPORT": 515586, "M2": 1756.2, "IPI": 168.8, "CSI": 103.0}, - 2014: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.5, "BASE_RATE": 2.00, "CD_RATE": 2.36, "CPI": 1.3, "LEADING_IDX": 101.0, - "GOVT_3Y": 2.56, "CORP_AA": 2.99, "CORP_BBB": 5.22, "EXCHANGE_RATE": 1053, "EXPORT": 572665, "IMPORT": 525515, "M2": 1871.0, "IPI": 168.5, "CSI": 104.0}, - 2015: {"GDP_GROWTH": 2.8, "UNEMPLOYMENT": 3.6, "BASE_RATE": 1.50, "CD_RATE": 1.72, "CPI": 0.7, "LEADING_IDX": 100.5, - "GOVT_3Y": 1.80, "CORP_AA": 2.18, "CORP_BBB": 4.61, "EXCHANGE_RATE": 1131, "EXPORT": 526757, "IMPORT": 436499, "M2": 2010.0, "IPI": 168.0, "CSI": 103.5}, - 2016: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.25, "CD_RATE": 1.48, "CPI": 1.0, "LEADING_IDX": 99.8, - "GOVT_3Y": 1.44, "CORP_AA": 1.88, "CORP_BBB": 4.60, "EXCHANGE_RATE": 1161, "EXPORT": 495426, "IMPORT": 406193, "M2": 2151.1, "IPI": 168.5, "CSI": 100.0}, - 2017: {"GDP_GROWTH": 3.2, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.50, "CD_RATE": 1.52, "CPI": 1.9, "LEADING_IDX": 101.5, - "GOVT_3Y": 1.80, "CORP_AA": 2.28, "CORP_BBB": 4.83, "EXCHANGE_RATE": 1131, "EXPORT": 573694, "IMPORT": 478478, "M2": 2347.2, "IPI": 174.2, "CSI": 105.0}, - 2018: {"GDP_GROWTH": 2.9, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.75, "CD_RATE": 1.85, "CPI": 1.5, "LEADING_IDX": 100.8, - "GOVT_3Y": 2.10, "CORP_AA": 2.67, "CORP_BBB": 5.41, "EXCHANGE_RATE": 1100, "EXPORT": 604860, "IMPORT": 535202, "M2": 2508.9, "IPI": 178.0, "CSI": 102.0}, - 2019: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 3.8, "BASE_RATE": 1.25, "CD_RATE": 1.63, "CPI": 0.4, "LEADING_IDX": 99.3, - "GOVT_3Y": 1.50, "CORP_AA": 1.93, "CORP_BBB": 4.52, "EXCHANGE_RATE": 1166, "EXPORT": 542233, "IMPORT": 503343, "M2": 2694.0, "IPI": 175.5, "CSI": 97.0}, - 2020: {"GDP_GROWTH": -0.7, "UNEMPLOYMENT": 4.0, "BASE_RATE": 0.50, "CD_RATE": 0.76, "CPI": 0.5, "LEADING_IDX": 97.0, - "GOVT_3Y": 0.98, "CORP_AA": 2.03, "CORP_BBB": 5.25, "EXCHANGE_RATE": 1180, "EXPORT": 512498, "IMPORT": 467633, "M2": 3070.2, "IPI": 170.0, "CSI": 90.0}, - 2021: {"GDP_GROWTH": 4.3, "UNEMPLOYMENT": 3.7, "BASE_RATE": 1.00, "CD_RATE": 1.09, "CPI": 2.5, "LEADING_IDX": 102.8, - "GOVT_3Y": 1.43, "CORP_AA": 2.26, "CORP_BBB": 5.64, "EXCHANGE_RATE": 1144, "EXPORT": 644400, "IMPORT": 615093, "M2": 3415.8, "IPI": 183.0, "CSI": 106.0}, - 2022: {"GDP_GROWTH": 2.6, "UNEMPLOYMENT": 2.9, "BASE_RATE": 3.25, "CD_RATE": 3.77, "CPI": 5.1, "LEADING_IDX": 99.2, - "GOVT_3Y": 3.14, "CORP_AA": 4.25, "CORP_BBB": 8.18, "EXCHANGE_RATE": 1292, "EXPORT": 683585, "IMPORT": 731370, "M2": 3561.0, "IPI": 186.5, "CSI": 95.0}, - 2023: {"GDP_GROWTH": 1.4, "UNEMPLOYMENT": 2.7, "BASE_RATE": 3.50, "CD_RATE": 3.75, "CPI": 3.6, "LEADING_IDX": 98.8, - "GOVT_3Y": 3.55, "CORP_AA": 4.40, "CORP_BBB": 8.40, "EXCHANGE_RATE": 1305, "EXPORT": 632744, "IMPORT": 642756, "M2": 3680.0, "IPI": 183.0, "CSI": 96.5}, - 2024: {"GDP_GROWTH": 2.2, "UNEMPLOYMENT": 2.8, "BASE_RATE": 3.00, "CD_RATE": 3.30, "CPI": 2.3, "LEADING_IDX": 99.5, - "GOVT_3Y": 3.20, "CORP_AA": 3.90, "CORP_BBB": 7.50, "EXCHANGE_RATE": 1350, "EXPORT": 660000, "IMPORT": 650000, "M2": 3800.0, "IPI": 185.0, "CSI": 98.0}, - 2025: {"GDP_GROWTH": 1.8, "UNEMPLOYMENT": 3.0, "BASE_RATE": 2.75, "CD_RATE": 3.00, "CPI": 1.8, "LEADING_IDX": 99.8, - "GOVT_3Y": 2.80, "CORP_AA": 3.50, "CORP_BBB": 6.80, "EXCHANGE_RATE": 1380, "EXPORT": 650000, "IMPORT": 640000, "M2": 3900.0, "IPI": 184.0, "CSI": 99.0}, - } - return pd.DataFrame(data).T.rename_axis("YEAR") +# 지수형 변수 (원본 + DIFF + LAG1) +INDEX_VARS = {"LEADING_INDEX", "COINCIDENT", "BSI_MANUF", "CSI", "IPI", "SPI", + "RETAIL_SALES", "FACILITY_INVEST", "IMPORT_PRICE", "HOUSING_PRICE"} + +# 금액형 변수 (DIFF + PCT) +AMOUNT_VARS = {"EXPORT", "IMPORT_AMT", "M2", "HOUSEHOLD_DEBT", "CONSTRUCTION_DONE", "EMPLOYED"} + +# 가격형 (원본 + DIFF + PCT + LAG1) +PRICE_VARS = {"USDKRW", "OIL_PRICE", "KOSPI"} def build_features(raw: pd.DataFrame) -> pd.DataFrame: - """변수 변환 — 금리는 DIFF만, 나머지는 허용된 변환만""" + """31개 원본 → 파생변수 생성""" feat = {} + for col in raw.columns: s = raw[col].sort_index() - + if col in RATE_VARS: - # 금리: DIFF만 feat[f"{col}_DIFF"] = s.diff() - # 래그도 금리 레벨의 LAG은 써도 됨 (차분 아님) feat[f"{col}_LAG1"] = s.shift(1) - elif col in ("GDP_GROWTH", "CPI"): - # 이미 변화율: 원본 + LAG + elif col in ALREADY_RATE_VARS: feat[col] = s feat[f"{col}_LAG1"] = s.shift(1) - elif col in ("UNEMPLOYMENT",): + elif col in INDEX_VARS: feat[col] = s - feat[f"{col}_LAG1"] = s.shift(1) feat[f"{col}_DIFF"] = s.diff() - elif col in ("LEADING_IDX", "LEADING_INDEX"): - feat[col] = s feat[f"{col}_LAG1"] = s.shift(1) + elif col in AMOUNT_VARS: feat[f"{col}_DIFF"] = s.diff() - elif col in ("EXCHANGE_RATE",): + feat[f"{col}_PCT"] = s.pct_change() * 100 + elif col in PRICE_VARS: feat[col] = s feat[f"{col}_DIFF"] = s.diff() feat[f"{col}_PCT"] = s.pct_change() * 100 feat[f"{col}_LAG1"] = s.shift(1) - elif col in ("EXPORT", "IMPORT"): - feat[f"{col}_PCT"] = s.pct_change() * 100 - feat[f"{col}_DIFF"] = s.diff() - elif col in ("M2",): - feat[f"{col}_PCT"] = s.pct_change() * 100 - elif col in ("IPI", "CSI"): + elif col == "DISHONOR_RATE": feat[col] = s feat[f"{col}_DIFF"] = s.diff() feat[f"{col}_LAG1"] = s.shift(1) - + elif col == "CURRENT_ACCOUNT": + feat[col] = s + feat[f"{col}_DIFF"] = s.diff() + feat[f"{col}_LAG1"] = s.shift(1) + # 파생 변수 if "CORP_BBB" in raw.columns and "CORP_AA" in raw.columns: cs = raw["CORP_BBB"] - raw["CORP_AA"] feat["CREDIT_SPREAD"] = cs feat["CREDIT_SPREAD_DIFF"] = cs.diff() feat["CREDIT_SPREAD_LAG1"] = cs.shift(1) - if "GOVT_3Y" in raw.columns and "BASE_RATE" in raw.columns: - ts = raw["GOVT_3Y"] - raw["BASE_RATE"] + if "GOVT_10Y" in raw.columns and "BASE_RATE" in raw.columns: + ts = raw["GOVT_10Y"] - raw["BASE_RATE"] feat["TERM_SPREAD"] = ts feat["TERM_SPREAD_DIFF"] = ts.diff() feat["TERM_SPREAD_LAG1"] = ts.shift(1) - if "BASE_RATE" in raw.columns and "CPI" in raw.columns: - feat["REAL_RATE"] = raw["BASE_RATE"] - raw["CPI"] + if "BASE_RATE" in raw.columns and "CPI_GROWTH" in raw.columns: + feat["REAL_RATE"] = raw["BASE_RATE"] - raw["CPI_GROWTH"] feat["REAL_RATE_DIFF"] = feat["REAL_RATE"].diff() - if "EXPORT" in raw.columns and "IMPORT" in raw.columns: - tb = raw["EXPORT"] - raw["IMPORT"] + if "EXPORT" in raw.columns and "IMPORT_AMT" in raw.columns: + tb = raw["EXPORT"] - raw["IMPORT_AMT"] feat["TRADE_BALANCE"] = tb feat["TRADE_BALANCE_DIFF"] = tb.diff() - + return pd.DataFrame(feat).dropna(axis=1, thresh=15) @@ -201,7 +182,7 @@ def check_sign_consistency(combo_vars, coefficients): for var, coef in zip(combo_vars, coefficients): expected = EXPECTED_SIGNS.get(var, 0) if expected == 0: - continue # 부호 제약 없음 + continue actual_sign = +1 if coef > 0 else -1 if actual_sign != expected: all_ok = False @@ -212,8 +193,8 @@ def check_sign_consistency(combo_vars, coefficients): def main(): print("=" * 70) - print(" 거시변수 재분석 v2 — 금리 DIFF 전용 + 계수 부호 검증") - print(" Zt: 2000~2025 (양수=경기악화)") + print(" 거시변수 재분석 v3 — 31변수 확장 + Zt 부호 수정") + print(" Zt: 양수=호황(Belkin), 2000~2025") print("=" * 70) # Zt @@ -225,22 +206,26 @@ def main(): ttc = compute_ttc_matrix(tm) zt_full = estimate_zt_series(tm, ttc, rho=0.20) zt_series = pd.Series(zt_full, name="Zt") - # 2000~2025만 사용 zt_series = zt_series[(zt_series.index >= 2000) & (zt_series.index <= 2025)] print(f"\n Zt: {len(zt_series)}obs ({zt_series.index.min()}~{zt_series.index.max()})") + print(f" Zt 부호 확인: 1998={zt_full.get(1998, 'N/A'):.3f} (위기=음수 OK?)") + print(f" 2006={zt_full.get(2006, 'N/A'):.3f} (호황=양수 OK?)") - # 변수 - raw = load_fallback() + # 31변수 로딩 (캐시) + from data.ecos_fetcher import load_macro_data + raw = load_macro_data(2000, 2025) + print(f"\n 원본 변수: {len(raw.columns)}개") + features = build_features(raw) features = features[(features.index >= 2000) & (features.index <= 2025)] - print(f" Features: {len(features.columns)}개") - print(f" 변수 목록: {', '.join(features.columns)}") + print(f" 파생 포함: {len(features.columns)}개") + print(f" 변수: {', '.join(sorted(features.columns))}") # 상관분석 common = sorted(set(zt_series.index) & set(features.index)) zt = zt_series.loc[common] - print(f"\n === Top 20 상관 (|r|) ===") + print(f"\n === Top 30 상관 (|r|) ===") corrs = [] for col in features.columns: s = features.loc[common, col].dropna() @@ -254,18 +239,22 @@ def main(): corrs = sorted(corrs, key=lambda x: x["abs_r"], reverse=True) print(f" {'Variable':30s} {'r':>8} {'p':>8} {'Sign':>6} {'n':>4}") - for c in corrs[:20]: + for c in corrs[:30]: sig = "***" if c["p"] < 0.01 else ("**" if c["p"] < 0.05 else ("*" if c["p"] < 0.1 else "")) print(f" {c['var']:30s} {c['r']:>7.4f}{sig:<1} {c['p']:>7.4f} {c['sign']:>6} {c['n']:>4}") # 부호 OK인 변수만 후보 sign_ok_vars = [c["var"] for c in corrs if c["sign"] == "OK" and c["abs_r"] > 0.15] print(f"\n 부호 일관 + |r|>0.15 후보: {len(sign_ok_vars)}개") + for v in sign_ok_vars: + c = next(x for x in corrs if x["var"] == v) + print(f" {v:30s} r={c['r']:+.4f}") # 3변수 탐색 print(f"\n === 3변수 Exhaustive Search (부호 검증 포함) ===") - top_n = min(25, len(sign_ok_vars)) + top_n = min(30, len(sign_ok_vars)) candidates = sign_ok_vars[:top_n] + print(f" 후보 {top_n}개에서 C({top_n},3)={len(list(itertools.combinations(range(top_n), 3)))} 조합 탐색") results = [] for combo in itertools.combinations(candidates, 3): @@ -296,7 +285,6 @@ def main(): except: continue - # 계수 부호 검증 (표준화된 계수) sign_ok, sign_issues = check_sign_consistency(combo_list, model.params[1:]) results.append({ @@ -305,38 +293,40 @@ def main(): "adj_r2": model.rsquared_adj, "aic": model.aic, "f_p": model.f_pvalue, + "dw": sm.stats.durbin_watson(model.resid), "sign_ok": sign_ok, "sign_issues": sign_issues, "pvalues": model.pvalues[1:].tolist(), "coeffs": model.params[1:].tolist(), }) - # adj R² 기준 정렬 (부호 일관 우선) results.sort(key=lambda x: (-x["sign_ok"], -x["adj_r2"])) - print(f"\n 검색: {len(results)} 조합") + print(f"\n 검색: {len(results)} 유효 조합 (공선성 제거 후)") print(f"\n === Top 10 (부호 일관 + adj.R² 기준) ===") - print(f" {'#':>3} {'R2':>7} {'adjR2':>7} {'AIC':>7} {'Sign':>5} | {'Variables (coefficient)'}") + print(f" {'#':>3} {'R2':>7} {'adjR2':>7} {'AIC':>7} {'DW':>5} {'Sign':>5} | {'Variables (coefficient)'}") for i, res in enumerate(results[:10]): vars_info = " + ".join([ f"{v}({c:+.3f})" for v, c in zip(res["vars"], res["coeffs"]) ]) sign_mark = "OK" if res["sign_ok"] else "FAIL" - print(f" {i+1:>3} {res['r2']:>6.4f} {res['adj_r2']:>6.4f} {res['aic']:>6.1f} {sign_mark:>5} | {vars_info}") + print(f" {i+1:>3} {res['r2']:>6.4f} {res['adj_r2']:>6.4f} {res['aic']:>6.1f} {res['dw']:>5.2f} {sign_mark:>5} | {vars_info}") if res["sign_issues"]: for issue in res["sign_issues"]: print(f" SIGN: {issue}") - # 최적 모형 상세 (부호 OK 중 1위) + # 최적 모형 상세 best_sign_ok = [r for r in results if r["sign_ok"]] if best_sign_ok: best = best_sign_ok[0] - print(f"\n === 최적 모형 (부호 일관) ===") + print(f"\n {'='*60}") + print(f" 최적 모형 (부호 일관)") + print(f" {'='*60}") print(f" Variables: {best['vars']}") print(f" R² = {best['r2']:.4f}, Adj.R² = {best['adj_r2']:.4f}") - print(f" AIC = {best['aic']:.2f}, F p-value = {best['f_p']:.6f}") + print(f" AIC = {best['aic']:.2f}, DW = {best['dw']:.3f}") + print(f" F p-value = {best['f_p']:.6f}") - # 상세 OLS X_df = features.loc[common, best["vars"]].dropna() valid_idx = X_df.index.intersection(zt.index) y = zt.loc[valid_idx].values @@ -347,19 +337,25 @@ def main(): model = sm.OLS(y, sm.add_constant(Xn)).fit() print(f"\n{model.summary()}") - # 전체 상위 목록에서 부호 FAIL도 보여주기 - all_top = results[:10] - best_any = all_top[0] if all_top else None - if best_any and not best_any["sign_ok"]: - print(f"\n [참고] 부호 무시 시 최고 R²={best_any['r2']:.4f}: {best_any['vars']}") + # R² > 0.7 필터 + high_r2 = [r for r in results if r["sign_ok"] and r["r2"] >= 0.7] + if high_r2: + print(f"\n === R² ≥ 0.7 조합 ({len(high_r2)}개) ===") + for i, r in enumerate(high_r2[:10]): + vi = " + ".join([f"{v}({c:+.3f})" for v, c in zip(r["vars"], r["coeffs"])]) + print(f" {i+1:>3} R²={r['r2']:.4f} adjR²={r['adj_r2']:.4f} DW={r['dw']:.2f} | {vi}") + else: + print(f"\n R² ≥ 0.7 조합: 없음 (top adj.R² = {best_sign_ok[0]['adj_r2']:.4f})") # CSV 저장 out = BASE_DIR / "results" out.mkdir(exist_ok=True) pd.DataFrame([{ "rank": i+1, "vars": " + ".join(r["vars"]), - "r2": r["r2"], "adj_r2": r["adj_r2"], "aic": r["aic"], + "r2": round(r["r2"], 4), "adj_r2": round(r["adj_r2"], 4), + "aic": round(r["aic"], 2), "dw": round(r["dw"], 3), "sign_ok": r["sign_ok"], + "coeffs": " / ".join([f"{c:+.3f}" for c in r["coeffs"]]), } for i, r in enumerate(results[:30])]).to_csv( out / "macro_top30_combos.csv", index=False )