feat(data): implement market-implied PD floor and 7x7 transition matrix parsing #task-290
This commit is contained in:
@@ -29,7 +29,8 @@ if sys.stdout.encoding != 'utf-8':
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||||
|
||||
MODEL_GRADES = ["AAA", "AA", "A", "BBB", "BB", "B", "CCC", "D"]
|
||||
MODEL_GRADES_7 = ["AAA", "AA", "A", "BBB", "BB", "B", "D"]
|
||||
MODEL_GRADES_8 = ["AAA", "AA", "A", "BBB", "BB", "B", "CCC", "D"]
|
||||
GRADE_LABELS = ["AAA", "AA", "A", "BBB", "BB"] # B이하 is separate
|
||||
|
||||
BASE_DIR = Path(__file__).parent.parent
|
||||
@@ -460,87 +461,72 @@ def _parse_kr_numbers(s: str) -> Optional[List[float]]:
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 후처리: 6×8 → 8×8
|
||||
# 후처리: 6x8 -> 7x7 (WR->D 보정 + CCC 제거)
|
||||
# ============================================================
|
||||
def postprocess_matrix(raw_6x8: np.ndarray) -> np.ndarray:
|
||||
"""6×8 (AAA~B이하 × AAA~WR) → 8×8 (AAA~D × AAA~D)"""
|
||||
|
||||
_BROAD_GRADE_MAP_6 = {0: "AAA", 1: "AA", 2: "A", 3: "BBB", 4: "BB", 5: "B"}
|
||||
|
||||
|
||||
def postprocess_matrix(raw_6x8, pd_floors=None):
|
||||
"""6x8 (AAA~B이하 x AAA~WR+D) -> 7x7 (AAA~B+D)
|
||||
|
||||
Steps:
|
||||
1. PD floor correction: if observed PD < floor, transfer from WR to D
|
||||
2. Remaining WR -> proportional redistribution
|
||||
3. B이하 -> B mapping
|
||||
4. Add D row (absorbing state)
|
||||
5. Normalize rows to sum=1
|
||||
"""
|
||||
assert raw_6x8.shape == (6, 8), f"Expected (6,8), got {raw_6x8.shape}"
|
||||
|
||||
# WR 열(7) 제거 → 비례 재배분
|
||||
mat_6x7 = raw_6x8[:, :7].copy()
|
||||
for i in range(6):
|
||||
row_sum = mat_6x7[i].sum()
|
||||
if row_sum > 0:
|
||||
mat_6x7[i] = mat_6x7[i] / row_sum * 100.0
|
||||
mat = raw_6x8.copy()
|
||||
COL_D = 6
|
||||
COL_WR = 7
|
||||
|
||||
# 8×8 구성: B이하(5) → B(5), D:col6→col7
|
||||
mat = np.zeros((8, 8))
|
||||
# Step 1: PD floor correction (WR -> D transfer)
|
||||
if pd_floors is not None:
|
||||
for i in range(6):
|
||||
broad = _BROAD_GRADE_MAP_6[i]
|
||||
if broad not in pd_floors:
|
||||
continue
|
||||
floor_pct = pd_floors[broad] * 100 # decimal -> %
|
||||
observed_pd = mat[i, COL_D]
|
||||
wr_available = mat[i, COL_WR]
|
||||
if observed_pd < floor_pct and wr_available > 0:
|
||||
deficit = floor_pct - observed_pd
|
||||
transfer = min(deficit, wr_available)
|
||||
mat[i, COL_D] += transfer
|
||||
mat[i, COL_WR] -= transfer
|
||||
|
||||
# Step 2: Remaining WR -> proportional redistribution
|
||||
for i in range(6):
|
||||
wr_remaining = mat[i, COL_WR]
|
||||
if wr_remaining > 0:
|
||||
non_wr_cols = mat[i, :7]
|
||||
non_wr_sum = non_wr_cols.sum()
|
||||
if non_wr_sum > 0:
|
||||
mat[i, :7] = non_wr_cols * (non_wr_sum + wr_remaining) / non_wr_sum
|
||||
mat[i, COL_WR] = 0.0
|
||||
|
||||
# Step 3: B이하 -> B mapping + build 7x7
|
||||
mat_7x7 = np.zeros((7, 7))
|
||||
for i in range(6):
|
||||
for j in range(6):
|
||||
mat[i, j] = mat_6x7[i, j]
|
||||
mat[i, 7] = mat_6x7[i, 6] # D
|
||||
mat_7x7[i, j] = mat[i, j]
|
||||
mat_7x7[i, 6] = mat[i, COL_D]
|
||||
|
||||
# CCC 행/열 extrapolation
|
||||
mat = _extrapolate_ccc(mat)
|
||||
# Step 4: D row (absorbing state)
|
||||
mat_7x7[6, :] = 0.0
|
||||
mat_7x7[6, 6] = 100.0
|
||||
|
||||
# D 행
|
||||
mat[7, :] = 0.0
|
||||
mat[7, 7] = 100.0
|
||||
|
||||
# → 확률, 행합 정규화
|
||||
mat /= 100.0
|
||||
for i in range(8):
|
||||
s = mat[i].sum()
|
||||
# Step 5: Convert to probability and normalize
|
||||
mat_7x7 /= 100.0
|
||||
for i in range(7):
|
||||
s = mat_7x7[i].sum()
|
||||
if s > 0:
|
||||
mat[i] /= s
|
||||
mat_7x7[i] /= s
|
||||
|
||||
return mat
|
||||
|
||||
|
||||
def _extrapolate_ccc(mat: np.ndarray) -> np.ndarray:
|
||||
"""CCC 행/열 extrapolation from B이하 PD 패턴"""
|
||||
pd_bb = mat[4, 7]
|
||||
pd_b = mat[5, 7]
|
||||
|
||||
# CCC PD
|
||||
if pd_bb > 0 and pd_b > pd_bb:
|
||||
ratio = pd_b / pd_bb
|
||||
else:
|
||||
ratio = 2.5
|
||||
pd_ccc = min(pd_b * ratio, 60.0)
|
||||
pd_ccc = max(pd_ccc, pd_b * 1.5)
|
||||
|
||||
# Stay rates
|
||||
stay_bb = mat[4, 4]
|
||||
stay_b = mat[5, 5]
|
||||
stay_ratio = (stay_b / stay_bb) if (stay_bb > 0 and stay_b < stay_bb) else 0.7
|
||||
stay_ccc = max(stay_b * stay_ratio, 5.0)
|
||||
|
||||
upgrade_to_b = mat[5, 4] * 0.8 if mat[5, 4] > 0 else 2.0
|
||||
|
||||
# CCC 행
|
||||
mat[6, :] = [0, 0, 0.1, 0.2, 0.3, upgrade_to_b, stay_ccc, pd_ccc]
|
||||
|
||||
ccc_sum = mat[6].sum()
|
||||
if ccc_sum > 100:
|
||||
mat[6, 6] = max(mat[6, 6] - (ccc_sum - 100), 1.0)
|
||||
elif ccc_sum < 100:
|
||||
mat[6, 6] += (100 - ccc_sum)
|
||||
|
||||
# CCC 열: B→CCC, BB→CCC, BBB→CCC 전이 분리
|
||||
b_to_ccc = mat[5, 5] * 0.15
|
||||
mat[5, 6] = b_to_ccc
|
||||
mat[5, 5] -= b_to_ccc
|
||||
|
||||
bb_to_ccc = mat[4, 5] * 0.1 if mat[4, 5] > 0 else 0.5
|
||||
mat[4, 6] = bb_to_ccc
|
||||
mat[4, 5] = max(mat[4, 5] - bb_to_ccc, 0)
|
||||
|
||||
mat[3, 6] = 0.3
|
||||
mat[3, 5] = max(mat[3, 5] - 0.15, 0)
|
||||
mat[3, 3] = max(mat[3, 3] - 0.15, 0)
|
||||
|
||||
return mat
|
||||
return mat_7x7
|
||||
|
||||
|
||||
# ============================================================
|
||||
@@ -549,6 +535,17 @@ def _extrapolate_ccc(mat: np.ndarray) -> np.ndarray:
|
||||
def main():
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Load PD floors
|
||||
broad_floors = None
|
||||
try:
|
||||
import sys as _sys
|
||||
_sys.path.insert(0, str(BASE_DIR))
|
||||
from data.pd_floor import build_complete_pd_floor_table
|
||||
broad_floors, _, _ = build_complete_pd_floor_table()
|
||||
print(f" PD floor loaded: {', '.join(f'{g}={v*10000:.1f}bp' for g, v in broad_floors.items())}")
|
||||
except Exception as e:
|
||||
print(f" PD floor load failed ({e}), proceeding without floor")
|
||||
|
||||
all_matrices = {}
|
||||
|
||||
for agency, pdf_path in PDF_FILES.items():
|
||||
@@ -582,7 +579,7 @@ def main():
|
||||
processed = {}
|
||||
for year, raw_mat in sorted(raw.items()):
|
||||
try:
|
||||
processed[year] = postprocess_matrix(raw_mat)
|
||||
processed[year] = postprocess_matrix(raw_mat, pd_floors=broad_floors)
|
||||
except Exception as e:
|
||||
print(f" ERROR {year}: {e}")
|
||||
|
||||
@@ -590,7 +587,7 @@ def main():
|
||||
print(f" Processed {len(processed)} matrices")
|
||||
|
||||
for year, mat in processed.items():
|
||||
df = pd.DataFrame(mat, index=MODEL_GRADES, columns=MODEL_GRADES)
|
||||
df = pd.DataFrame(mat, index=MODEL_GRADES_7, columns=MODEL_GRADES_7)
|
||||
df.to_csv(OUTPUT_DIR / f"{agency}_{year}.csv", float_format="%.6f")
|
||||
|
||||
# 3사 평균
|
||||
@@ -609,11 +606,11 @@ def main():
|
||||
|
||||
for year in common_years:
|
||||
avg = np.mean([all_matrices[a][year] for a in agency_names], axis=0)
|
||||
for i in range(8):
|
||||
for i in range(7):
|
||||
s = avg[i].sum()
|
||||
if s > 0:
|
||||
avg[i] /= s
|
||||
df = pd.DataFrame(avg, index=MODEL_GRADES, columns=MODEL_GRADES)
|
||||
df = pd.DataFrame(avg, index=MODEL_GRADES_7, columns=MODEL_GRADES_7)
|
||||
df.to_csv(OUTPUT_DIR / f"AVG_{year}.csv", float_format="%.6f")
|
||||
|
||||
# PD 요약
|
||||
@@ -632,7 +629,7 @@ def main():
|
||||
if sample_year not in common_years and not any(sample_year in all_matrices[a] for a in agency_names):
|
||||
continue
|
||||
print(f"\n Year {sample_year}:")
|
||||
for gi, grade in enumerate(MODEL_GRADES[:-1]):
|
||||
for gi, grade in enumerate(MODEL_GRADES_7[:-1]):
|
||||
print(f" {grade:>5}:", end='')
|
||||
for a in agency_names:
|
||||
if sample_year in all_matrices[a]:
|
||||
|
||||
Reference in New Issue
Block a user