feat(data): implement market-implied PD floor and 7x7 transition matrix parsing #task-290

This commit is contained in:
Variet Agent
2026-03-11 15:53:38 +09:00
parent 0762fcc5d8
commit b8514c1251
235 changed files with 1729 additions and 1102 deletions

View File

@@ -29,7 +29,8 @@ if sys.stdout.encoding != 'utf-8':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
MODEL_GRADES = ["AAA", "AA", "A", "BBB", "BB", "B", "CCC", "D"]
MODEL_GRADES_7 = ["AAA", "AA", "A", "BBB", "BB", "B", "D"]
MODEL_GRADES_8 = ["AAA", "AA", "A", "BBB", "BB", "B", "CCC", "D"]
GRADE_LABELS = ["AAA", "AA", "A", "BBB", "BB"] # B이하 is separate
BASE_DIR = Path(__file__).parent.parent
@@ -460,87 +461,72 @@ def _parse_kr_numbers(s: str) -> Optional[List[float]]:
# ============================================================
# 후처리: 6×8 → 8×8
# 후처리: 6x8 -> 7x7 (WR->D 보정 + CCC 제거)
# ============================================================
def postprocess_matrix(raw_6x8: np.ndarray) -> np.ndarray:
"""6×8 (AAA~B이하 × AAA~WR) → 8×8 (AAA~D × AAA~D)"""
_BROAD_GRADE_MAP_6 = {0: "AAA", 1: "AA", 2: "A", 3: "BBB", 4: "BB", 5: "B"}
def postprocess_matrix(raw_6x8, pd_floors=None):
"""6x8 (AAA~B이하 x AAA~WR+D) -> 7x7 (AAA~B+D)
Steps:
1. PD floor correction: if observed PD < floor, transfer from WR to D
2. Remaining WR -> proportional redistribution
3. B이하 -> B mapping
4. Add D row (absorbing state)
5. Normalize rows to sum=1
"""
assert raw_6x8.shape == (6, 8), f"Expected (6,8), got {raw_6x8.shape}"
# WR 열(7) 제거 → 비례 재배분
mat_6x7 = raw_6x8[:, :7].copy()
for i in range(6):
row_sum = mat_6x7[i].sum()
if row_sum > 0:
mat_6x7[i] = mat_6x7[i] / row_sum * 100.0
mat = raw_6x8.copy()
COL_D = 6
COL_WR = 7
# 8×8 구성: B이하(5) → B(5), D:col6→col7
mat = np.zeros((8, 8))
# Step 1: PD floor correction (WR -> D transfer)
if pd_floors is not None:
for i in range(6):
broad = _BROAD_GRADE_MAP_6[i]
if broad not in pd_floors:
continue
floor_pct = pd_floors[broad] * 100 # decimal -> %
observed_pd = mat[i, COL_D]
wr_available = mat[i, COL_WR]
if observed_pd < floor_pct and wr_available > 0:
deficit = floor_pct - observed_pd
transfer = min(deficit, wr_available)
mat[i, COL_D] += transfer
mat[i, COL_WR] -= transfer
# Step 2: Remaining WR -> proportional redistribution
for i in range(6):
wr_remaining = mat[i, COL_WR]
if wr_remaining > 0:
non_wr_cols = mat[i, :7]
non_wr_sum = non_wr_cols.sum()
if non_wr_sum > 0:
mat[i, :7] = non_wr_cols * (non_wr_sum + wr_remaining) / non_wr_sum
mat[i, COL_WR] = 0.0
# Step 3: B이하 -> B mapping + build 7x7
mat_7x7 = np.zeros((7, 7))
for i in range(6):
for j in range(6):
mat[i, j] = mat_6x7[i, j]
mat[i, 7] = mat_6x7[i, 6] # D
mat_7x7[i, j] = mat[i, j]
mat_7x7[i, 6] = mat[i, COL_D]
# CCC 행/열 extrapolation
mat = _extrapolate_ccc(mat)
# Step 4: D row (absorbing state)
mat_7x7[6, :] = 0.0
mat_7x7[6, 6] = 100.0
# D 행
mat[7, :] = 0.0
mat[7, 7] = 100.0
# → 확률, 행합 정규화
mat /= 100.0
for i in range(8):
s = mat[i].sum()
# Step 5: Convert to probability and normalize
mat_7x7 /= 100.0
for i in range(7):
s = mat_7x7[i].sum()
if s > 0:
mat[i] /= s
mat_7x7[i] /= s
return mat
def _extrapolate_ccc(mat: np.ndarray) -> np.ndarray:
"""CCC 행/열 extrapolation from B이하 PD 패턴"""
pd_bb = mat[4, 7]
pd_b = mat[5, 7]
# CCC PD
if pd_bb > 0 and pd_b > pd_bb:
ratio = pd_b / pd_bb
else:
ratio = 2.5
pd_ccc = min(pd_b * ratio, 60.0)
pd_ccc = max(pd_ccc, pd_b * 1.5)
# Stay rates
stay_bb = mat[4, 4]
stay_b = mat[5, 5]
stay_ratio = (stay_b / stay_bb) if (stay_bb > 0 and stay_b < stay_bb) else 0.7
stay_ccc = max(stay_b * stay_ratio, 5.0)
upgrade_to_b = mat[5, 4] * 0.8 if mat[5, 4] > 0 else 2.0
# CCC 행
mat[6, :] = [0, 0, 0.1, 0.2, 0.3, upgrade_to_b, stay_ccc, pd_ccc]
ccc_sum = mat[6].sum()
if ccc_sum > 100:
mat[6, 6] = max(mat[6, 6] - (ccc_sum - 100), 1.0)
elif ccc_sum < 100:
mat[6, 6] += (100 - ccc_sum)
# CCC 열: B→CCC, BB→CCC, BBB→CCC 전이 분리
b_to_ccc = mat[5, 5] * 0.15
mat[5, 6] = b_to_ccc
mat[5, 5] -= b_to_ccc
bb_to_ccc = mat[4, 5] * 0.1 if mat[4, 5] > 0 else 0.5
mat[4, 6] = bb_to_ccc
mat[4, 5] = max(mat[4, 5] - bb_to_ccc, 0)
mat[3, 6] = 0.3
mat[3, 5] = max(mat[3, 5] - 0.15, 0)
mat[3, 3] = max(mat[3, 3] - 0.15, 0)
return mat
return mat_7x7
# ============================================================
@@ -549,6 +535,17 @@ def _extrapolate_ccc(mat: np.ndarray) -> np.ndarray:
def main():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Load PD floors
broad_floors = None
try:
import sys as _sys
_sys.path.insert(0, str(BASE_DIR))
from data.pd_floor import build_complete_pd_floor_table
broad_floors, _, _ = build_complete_pd_floor_table()
print(f" PD floor loaded: {', '.join(f'{g}={v*10000:.1f}bp' for g, v in broad_floors.items())}")
except Exception as e:
print(f" PD floor load failed ({e}), proceeding without floor")
all_matrices = {}
for agency, pdf_path in PDF_FILES.items():
@@ -582,7 +579,7 @@ def main():
processed = {}
for year, raw_mat in sorted(raw.items()):
try:
processed[year] = postprocess_matrix(raw_mat)
processed[year] = postprocess_matrix(raw_mat, pd_floors=broad_floors)
except Exception as e:
print(f" ERROR {year}: {e}")
@@ -590,7 +587,7 @@ def main():
print(f" Processed {len(processed)} matrices")
for year, mat in processed.items():
df = pd.DataFrame(mat, index=MODEL_GRADES, columns=MODEL_GRADES)
df = pd.DataFrame(mat, index=MODEL_GRADES_7, columns=MODEL_GRADES_7)
df.to_csv(OUTPUT_DIR / f"{agency}_{year}.csv", float_format="%.6f")
# 3사 평균
@@ -609,11 +606,11 @@ def main():
for year in common_years:
avg = np.mean([all_matrices[a][year] for a in agency_names], axis=0)
for i in range(8):
for i in range(7):
s = avg[i].sum()
if s > 0:
avg[i] /= s
df = pd.DataFrame(avg, index=MODEL_GRADES, columns=MODEL_GRADES)
df = pd.DataFrame(avg, index=MODEL_GRADES_7, columns=MODEL_GRADES_7)
df.to_csv(OUTPUT_DIR / f"AVG_{year}.csv", float_format="%.6f")
# PD 요약
@@ -632,7 +629,7 @@ def main():
if sample_year not in common_years and not any(sample_year in all_matrices[a] for a in agency_names):
continue
print(f"\n Year {sample_year}:")
for gi, grade in enumerate(MODEL_GRADES[:-1]):
for gi, grade in enumerate(MODEL_GRADES_7[:-1]):
print(f" {grade:>5}:", end='')
for a in agency_names:
if sample_year in all_matrices[a]: