wip: [01-stabilize] paused at task 1/1 - OCR Hallucination Immune logic via Semantic delta window and fret-isolation
This commit is contained in:
1
.agent/knowledge/awesome_claude/tools/__init__.py
Normal file
1
.agent/knowledge/awesome_claude/tools/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Tooling helpers package."""
|
||||
49
.agent/knowledge/awesome_claude/tools/readme_tree/README.md
Normal file
49
.agent/knowledge/awesome_claude/tools/readme_tree/README.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# README Tree Utility
|
||||
|
||||
This utility keeps the file tree in `docs/README-GENERATION.md` up to date using a
|
||||
YAML configuration file. It updates only the section between `<!-- TREE:START -->`
|
||||
and `<!-- TREE:END -->`.
|
||||
|
||||
## Usage
|
||||
|
||||
Update the tree block:
|
||||
|
||||
```bash
|
||||
python tools/readme_tree/update_readme_tree.py
|
||||
```
|
||||
|
||||
Check for drift (CI-friendly):
|
||||
|
||||
```bash
|
||||
python tools/readme_tree/update_readme_tree.py --check
|
||||
```
|
||||
|
||||
## Config
|
||||
|
||||
The configuration lives in `tools/readme_tree/config.yaml`.
|
||||
|
||||
Key fields:
|
||||
- `doc_path`: Target document with the tree markers.
|
||||
- `marker_start`, `marker_end`: Marker strings for insertion.
|
||||
- `include`: Root-relative paths to include in the tree.
|
||||
- `ignore`: Patterns to ignore.
|
||||
- `prune`: Paths to include but not descend into.
|
||||
- `max_depth`: Maximum depth for traversal.
|
||||
- `entries`: Map of path → comment strings.
|
||||
- `virtual_entries`: Map of path → comment for paths that don't exist on disk.
|
||||
- `order`: Map of directory → ordered child patterns.
|
||||
- `respect_gitignore`: When true (default), .gitignore patterns are applied.
|
||||
|
||||
### Notes on Patterns
|
||||
|
||||
When `respect_gitignore` is true, the script uses `git check-ignore` to exclude
|
||||
ignored paths. The `ignore` patterns in the config are a simple glob-style filter
|
||||
with support for `!` negation, leading `/` anchors, and trailing `/` directory
|
||||
rules.
|
||||
|
||||
## Make Targets
|
||||
|
||||
```bash
|
||||
make docs-tree # Update the tree block
|
||||
make docs-tree-check # Fail if the tree block is out of date
|
||||
```
|
||||
@@ -0,0 +1 @@
|
||||
"""README tree tooling package."""
|
||||
181
.agent/knowledge/awesome_claude/tools/readme_tree/config.yaml
Normal file
181
.agent/knowledge/awesome_claude/tools/readme_tree/config.yaml
Normal file
@@ -0,0 +1,181 @@
|
||||
root: "awesome-claude-code/"
|
||||
doc_path: "docs/README-GENERATION.md"
|
||||
marker_start: "<!-- TREE:START -->"
|
||||
marker_end: "<!-- TREE:END -->"
|
||||
max_depth: 5
|
||||
respect_gitignore: true
|
||||
|
||||
include:
|
||||
- "THE_RESOURCES_TABLE.csv"
|
||||
- "acc-config.yaml"
|
||||
- "README.md"
|
||||
- "README_ALTERNATIVES/"
|
||||
- "templates/"
|
||||
- "scripts/readme/"
|
||||
- "scripts/ticker/"
|
||||
- "scripts/categories/"
|
||||
- "scripts/resources/"
|
||||
- "assets/"
|
||||
- "data/"
|
||||
- "docs/README-GENERATION.md"
|
||||
|
||||
ignore:
|
||||
- "__init__.py"
|
||||
- "py.typed"
|
||||
|
||||
prune:
|
||||
- "README_ALTERNATIVES/"
|
||||
- "assets/"
|
||||
|
||||
entries:
|
||||
"THE_RESOURCES_TABLE.csv": "Master data file"
|
||||
"acc-config.yaml": "Root style + selector config"
|
||||
"README.md": "Generated root README (root_style)"
|
||||
"README_ALTERNATIVES/": "All generated README variants"
|
||||
"templates/": "README templates and supporting YAML"
|
||||
"templates/categories.yaml": "Category definitions"
|
||||
"templates/announcements.yaml": "Announcements content"
|
||||
"templates/footer.template.md": "Shared footer"
|
||||
"templates/README_EXTRA.template.md": "Extra style template"
|
||||
"templates/README_CLASSIC.template.md": "Classic style template"
|
||||
"templates/README_AWESOME.template.md": "Awesome style template"
|
||||
"templates/resource-overrides.yaml": "Manual resource overrides"
|
||||
"scripts/readme/": "README generation pipeline"
|
||||
"scripts/readme/generators/": "README generator classes by style"
|
||||
"scripts/readme/generate_readme.py": "Generator entrypoint"
|
||||
"scripts/readme/helpers/": "Config/utils/assets helpers for README generation"
|
||||
"scripts/readme/markup/": "Markdown/HTML renderers by style"
|
||||
"scripts/readme/svg_templates/": "SVG renderers used by the generator"
|
||||
"scripts/readme/generators/base.py": "ReadmeGenerator base + shared helpers"
|
||||
"scripts/readme/generators/visual.py": "Extra (visual) README generator"
|
||||
"scripts/readme/generators/minimal.py": "Classic README generator"
|
||||
"scripts/readme/generators/awesome.py": "Awesome list README generator"
|
||||
"scripts/readme/generators/flat.py": "Flat list README generator"
|
||||
"scripts/ticker/": "Repo ticker generation scripts"
|
||||
"scripts/ticker/generate_ticker_svg.py": "Repo ticker SVG generator"
|
||||
"scripts/ticker/fetch_repo_ticker_data.py": "GitHub stats fetcher"
|
||||
"scripts/categories/": "Category management scripts"
|
||||
"scripts/categories/category_utils.py": "Category management"
|
||||
"scripts/categories/add_category.py": "Category addition tool"
|
||||
"scripts/resources/": "Resource maintenance scripts"
|
||||
"scripts/resources/sort_resources.py": "CSV sorting (used by generator)"
|
||||
"scripts/resources/resource_utils.py": "CSV append + PR content helpers"
|
||||
"assets/": "SVG badges, headers, dividers"
|
||||
"data/": "Generated ticker data"
|
||||
"data/repo-ticker.csv": "Current repository stats"
|
||||
"data/repo-ticker-previous.csv": "Previous stats (for deltas)"
|
||||
"docs/README-GENERATION.md": "This file"
|
||||
|
||||
virtual_entries:
|
||||
"README_ALTERNATIVES/README_EXTRA.md": "Generated (Extra style, always)"
|
||||
"README_ALTERNATIVES/README_CLASSIC.md": "Generated (Classic style)"
|
||||
"README_ALTERNATIVES/README_AWESOME.md": "Generated (Awesome list style)"
|
||||
"README_ALTERNATIVES/README_FLAT_*.md": "Generated (44 flat list views)"
|
||||
"assets/badge-*.svg": "Resource badges (auto-generated)"
|
||||
"assets/header_*.svg": "Category headers"
|
||||
"assets/section-divider-*.svg": "Section dividers"
|
||||
"assets/desc-box-*.svg": "Description boxes"
|
||||
"assets/toc-*.svg": "TOC elements"
|
||||
"assets/subheader_*.svg": "Subcategory headers"
|
||||
"assets/badge-sort-*.svg": "Flat list sort badges"
|
||||
"assets/badge-cat-*.svg": "Flat list category badges"
|
||||
"assets/badge-style-*.svg": "Style selector badges"
|
||||
"assets/repo-ticker*.svg": "Animated repo tickers"
|
||||
"assets/entry-separator-*.svg": "Entry separators"
|
||||
|
||||
order:
|
||||
"": [
|
||||
"THE_RESOURCES_TABLE.csv",
|
||||
"acc-config.yaml",
|
||||
"README.md",
|
||||
"README_ALTERNATIVES",
|
||||
"templates",
|
||||
"scripts",
|
||||
"assets",
|
||||
"data",
|
||||
"docs",
|
||||
]
|
||||
"README_ALTERNATIVES": [
|
||||
"README_EXTRA.md",
|
||||
"README_CLASSIC.md",
|
||||
"README_AWESOME.md",
|
||||
"README_FLAT_*.md",
|
||||
]
|
||||
"templates": [
|
||||
"categories.yaml",
|
||||
"announcements.yaml",
|
||||
"README_EXTRA.template.md",
|
||||
"README_CLASSIC.template.md",
|
||||
"README_AWESOME.template.md",
|
||||
"footer.template.md",
|
||||
]
|
||||
"scripts": [
|
||||
"readme",
|
||||
"ticker",
|
||||
"categories",
|
||||
"resources",
|
||||
]
|
||||
"scripts/readme": [
|
||||
"generate_readme.py",
|
||||
"generators",
|
||||
"helpers",
|
||||
"markup",
|
||||
"svg_templates",
|
||||
]
|
||||
"scripts/readme/generators": [
|
||||
"base.py",
|
||||
"visual.py",
|
||||
"minimal.py",
|
||||
"awesome.py",
|
||||
"flat.py",
|
||||
]
|
||||
"scripts/readme/helpers": [
|
||||
"readme_assets.py",
|
||||
"readme_config.py",
|
||||
"readme_utils.py",
|
||||
]
|
||||
"scripts/readme/markup": [
|
||||
"awesome.py",
|
||||
"flat.py",
|
||||
"minimal.py",
|
||||
"shared.py",
|
||||
"visual.py",
|
||||
]
|
||||
"scripts/readme/svg_templates": [
|
||||
"badges.py",
|
||||
"dividers.py",
|
||||
"headers.py",
|
||||
"toc.py",
|
||||
]
|
||||
"scripts/ticker": [
|
||||
"generate_ticker_svg.py",
|
||||
"fetch_repo_ticker_data.py",
|
||||
]
|
||||
"scripts/categories": [
|
||||
"category_utils.py",
|
||||
"add_category.py",
|
||||
]
|
||||
"scripts/resources": [
|
||||
"sort_resources.py",
|
||||
"resource_utils.py",
|
||||
]
|
||||
"assets": [
|
||||
"badge-*.svg",
|
||||
"header_*.svg",
|
||||
"section-divider-*.svg",
|
||||
"desc-box-*.svg",
|
||||
"toc-*.svg",
|
||||
"subheader_*.svg",
|
||||
"badge-sort-*.svg",
|
||||
"badge-cat-*.svg",
|
||||
"badge-style-*.svg",
|
||||
"repo-ticker*.svg",
|
||||
"entry-separator-*.svg",
|
||||
]
|
||||
"data": [
|
||||
"repo-ticker.csv",
|
||||
"repo-ticker-previous.csv",
|
||||
]
|
||||
"docs": [
|
||||
"README-GENERATION.md",
|
||||
]
|
||||
@@ -0,0 +1,585 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Update the README generation tree block from a YAML spec."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import difflib
|
||||
import fnmatch
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class Node:
|
||||
"""Tree node representing a file or directory."""
|
||||
|
||||
name: str
|
||||
is_dir: bool
|
||||
children: dict[str, Node] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IgnoreRule:
|
||||
"""Parsed ignore rule from config patterns."""
|
||||
|
||||
pattern: str
|
||||
negated: bool
|
||||
dir_only: bool
|
||||
anchored: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class GitIgnoreChecker:
|
||||
"""Check paths against gitignore using `git check-ignore`."""
|
||||
|
||||
repo_root: Path
|
||||
enabled: bool = True
|
||||
_cache: dict[str, bool] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Disable checking when git is unavailable."""
|
||||
if not self._git_available():
|
||||
self.enabled = False
|
||||
|
||||
def _git_available(self) -> bool:
|
||||
"""Return True if git is available and repo_root is a git work tree."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"git",
|
||||
"-C",
|
||||
str(self.repo_root),
|
||||
"rev-parse",
|
||||
"--is-inside-work-tree",
|
||||
],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
return result.returncode == 0
|
||||
|
||||
@staticmethod
|
||||
def _canon_rel(rel_path: str) -> str:
|
||||
"""Canonicalize a repo-relative POSIX path for comparing with git output."""
|
||||
p = rel_path.replace("\\", "/").strip()
|
||||
if p.startswith("./"):
|
||||
p = p[2:]
|
||||
# git check-ignore may echo trailing slashes for dir queries; normalize them away
|
||||
if p.endswith("/"):
|
||||
p = p[:-1]
|
||||
return p
|
||||
|
||||
def _check(self, paths: list[str]) -> set[str]:
|
||||
"""Return the subset of paths ignored by git, using a single subprocess."""
|
||||
if not paths:
|
||||
return set()
|
||||
|
||||
# Normalize queries up-front to avoid mismatches with git echo format.
|
||||
canon_queries = [self._canon_rel(p) for p in paths if p]
|
||||
payload = "\0".join(canon_queries) + "\0"
|
||||
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(self.repo_root), "check-ignore", "-z", "--stdin"],
|
||||
input=payload.encode("utf-8"),
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
if result.returncode not in (0, 1):
|
||||
# Something unexpected (e.g. not a repo, weird git error). Disable to be safe.
|
||||
self.enabled = False
|
||||
return set()
|
||||
|
||||
output = result.stdout.decode("utf-8", errors="replace")
|
||||
ignored_raw = [entry for entry in output.split("\0") if entry]
|
||||
ignored = {self._canon_rel(p) for p in ignored_raw}
|
||||
return ignored
|
||||
|
||||
def is_ignored(self, rel_path: str, is_dir: bool) -> bool:
|
||||
"""Return True if a path is ignored by gitignore rules."""
|
||||
if not self.enabled:
|
||||
return False
|
||||
|
||||
canon = self._canon_rel(rel_path)
|
||||
cached = self._cache.get(canon)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
queries = [canon]
|
||||
# For directories, query both forms; git's echo varies depending on input.
|
||||
if is_dir:
|
||||
queries.append(f"{canon}/")
|
||||
|
||||
ignored = self._check(queries)
|
||||
match = any(self._canon_rel(q) in ignored for q in queries)
|
||||
|
||||
# Cache canonical key.
|
||||
self._cache[canon] = match
|
||||
return match
|
||||
|
||||
|
||||
def find_repo_root(start: Path) -> Path:
|
||||
"""Locate the repo root.
|
||||
|
||||
Prefer git to identify the VCS root; fall back to walking upward for pyproject.toml.
|
||||
|
||||
Args:
|
||||
start: Path inside the repo.
|
||||
|
||||
Returns:
|
||||
The repo root path.
|
||||
"""
|
||||
p = start.resolve()
|
||||
# Prefer git root if available.
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(p), "rev-parse", "--show-toplevel"],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
git_root = result.stdout.strip()
|
||||
if git_root:
|
||||
return Path(git_root)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
# Fallback: walk upward until pyproject.toml exists.
|
||||
while not (p / "pyproject.toml").exists():
|
||||
if p.parent == p:
|
||||
raise RuntimeError("Repo root not found (no git root and no pyproject.toml)")
|
||||
p = p.parent
|
||||
return p
|
||||
|
||||
|
||||
def normalize_key(path: str | Path | None) -> str:
|
||||
"""Normalize a path-like key into a repo-relative POSIX string."""
|
||||
if path is None:
|
||||
return ""
|
||||
s = str(path).strip()
|
||||
if s in {".", "./", ""}:
|
||||
return ""
|
||||
s = s.replace("\\", "/").strip("/")
|
||||
return s
|
||||
|
||||
|
||||
def load_config(config_path: Path) -> dict:
|
||||
"""Load the YAML configuration for tree generation."""
|
||||
data = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
raise RuntimeError("Invalid config format")
|
||||
return data
|
||||
|
||||
|
||||
def parse_ignore_rule(pattern: str | Path | None) -> IgnoreRule | None:
|
||||
"""Parse a raw ignore pattern into a structured rule."""
|
||||
if pattern is None:
|
||||
return None
|
||||
line = str(pattern).strip()
|
||||
if not line or line.startswith("#"):
|
||||
return None
|
||||
|
||||
negated = line.startswith("!")
|
||||
if negated:
|
||||
line = line[1:]
|
||||
|
||||
anchored = line.startswith("/")
|
||||
if anchored:
|
||||
line = line[1:]
|
||||
|
||||
dir_only = line.endswith("/")
|
||||
if dir_only:
|
||||
line = line[:-1]
|
||||
|
||||
line = line.replace("\\", "/").strip()
|
||||
if not line:
|
||||
return None
|
||||
|
||||
return IgnoreRule(pattern=line, negated=negated, dir_only=dir_only, anchored=anchored)
|
||||
|
||||
|
||||
def parse_ignore_rules(patterns: list[str | Path]) -> list[IgnoreRule]:
|
||||
"""Parse a list of ignore patterns into IgnoreRule entries."""
|
||||
rules: list[IgnoreRule] = []
|
||||
for pattern in patterns:
|
||||
rule = parse_ignore_rule(pattern)
|
||||
if rule:
|
||||
rules.append(rule)
|
||||
return rules
|
||||
|
||||
|
||||
def matches_ignore_rule(rule: IgnoreRule, rel_path: str, is_dir: bool) -> bool:
|
||||
"""Check whether a path matches a given ignore rule."""
|
||||
path = rel_path
|
||||
|
||||
if rule.dir_only and not is_dir:
|
||||
return False
|
||||
|
||||
# For dir-only rules, allow matching the dir itself or any descendant.
|
||||
if rule.dir_only:
|
||||
if rule.anchored:
|
||||
return path == rule.pattern or path.startswith(f"{rule.pattern}/")
|
||||
return (
|
||||
path == rule.pattern
|
||||
or path.endswith(f"/{rule.pattern}")
|
||||
or path.startswith(f"{rule.pattern}/")
|
||||
or f"/{rule.pattern}/" in f"/{path}/"
|
||||
)
|
||||
|
||||
if rule.anchored:
|
||||
return fnmatch.fnmatch(path, rule.pattern)
|
||||
|
||||
if "/" in rule.pattern:
|
||||
return fnmatch.fnmatch(path, rule.pattern) or fnmatch.fnmatch(path, f"*/{rule.pattern}")
|
||||
|
||||
# Basename match
|
||||
if fnmatch.fnmatch(Path(path).name, rule.pattern):
|
||||
return True
|
||||
return fnmatch.fnmatch(path, rule.pattern) or fnmatch.fnmatch(path, f"*/{rule.pattern}")
|
||||
|
||||
|
||||
def is_ignored(rel_path: str, is_dir: bool, rules: list[IgnoreRule]) -> bool:
|
||||
"""Determine if a path should be ignored based on ordered rules."""
|
||||
ignored = False
|
||||
for rule in rules:
|
||||
if matches_ignore_rule(rule, rel_path, is_dir):
|
||||
ignored = not rule.negated
|
||||
return ignored
|
||||
|
||||
|
||||
def is_pruned(rel_path: str, patterns: list[str]) -> bool:
|
||||
"""Check whether a path should be pruned (no descent)."""
|
||||
for pattern in patterns:
|
||||
pat = pattern.strip()
|
||||
if not pat:
|
||||
continue
|
||||
pat = pat.replace("\\", "/").strip("/")
|
||||
if rel_path == pat or rel_path.startswith(f"{pat}/"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def add_path(root: Node, parts: list[str], is_dir: bool) -> Node:
|
||||
"""Add a path to the tree, creating nodes as needed."""
|
||||
node = root
|
||||
for i, part in enumerate(parts):
|
||||
if not part:
|
||||
continue
|
||||
if part not in node.children:
|
||||
node.children[part] = Node(
|
||||
name=part,
|
||||
is_dir=is_dir if i == len(parts) - 1 else True,
|
||||
)
|
||||
node = node.children[part]
|
||||
node.is_dir = is_dir
|
||||
return node
|
||||
|
||||
|
||||
def walk_include(
|
||||
root: Node,
|
||||
repo_root: Path,
|
||||
include_path: Path,
|
||||
max_depth: int,
|
||||
ignore: list[IgnoreRule],
|
||||
gitignore: GitIgnoreChecker | None,
|
||||
prune: list[str],
|
||||
*,
|
||||
base_rel: str,
|
||||
base_depth: int,
|
||||
) -> None:
|
||||
"""Walk an include path and add matching entries to the tree.
|
||||
|
||||
Depth limiting is relative to the include root (not repo root).
|
||||
"""
|
||||
rel = include_path.relative_to(repo_root).as_posix()
|
||||
|
||||
if gitignore and gitignore.is_ignored(rel, include_path.is_dir()):
|
||||
return
|
||||
if is_ignored(rel, include_path.is_dir(), ignore):
|
||||
return
|
||||
|
||||
if include_path.is_file():
|
||||
add_path(root, rel.split("/"), is_dir=False)
|
||||
return
|
||||
|
||||
add_path(root, rel.split("/"), is_dir=True)
|
||||
if is_pruned(rel, prune):
|
||||
return
|
||||
|
||||
for child in sorted(include_path.iterdir(), key=lambda p: p.name.lower()):
|
||||
child_rel = child.relative_to(repo_root).as_posix()
|
||||
if gitignore and gitignore.is_ignored(child_rel, child.is_dir()):
|
||||
continue
|
||||
if is_ignored(child_rel, child.is_dir(), ignore):
|
||||
continue
|
||||
|
||||
# Depth relative to include root
|
||||
rel_to_include = child_rel[len(base_rel) :].lstrip("/") if base_rel else child_rel
|
||||
depth_rel = base_depth + (len(rel_to_include.split("/")) if rel_to_include else 0)
|
||||
if max_depth and depth_rel > max_depth:
|
||||
continue
|
||||
|
||||
if child.is_dir():
|
||||
walk_include(
|
||||
root,
|
||||
repo_root,
|
||||
child,
|
||||
max_depth,
|
||||
ignore,
|
||||
gitignore,
|
||||
prune,
|
||||
base_rel=base_rel,
|
||||
base_depth=base_depth,
|
||||
)
|
||||
else:
|
||||
add_path(root, child_rel.split("/"), is_dir=False)
|
||||
|
||||
|
||||
def build_tree(config: dict, repo_root: Path) -> Node:
|
||||
"""Build a tree based on config includes and virtual entries."""
|
||||
root_label = config.get("root", repo_root.name)
|
||||
root = Node(name=str(root_label), is_dir=True)
|
||||
include = [normalize_key(p) for p in config.get("include", [])]
|
||||
ignore_patterns = config.get("ignore", [])
|
||||
prune = [normalize_key(p) for p in config.get("prune", [])]
|
||||
max_depth = int(config.get("max_depth", 0))
|
||||
respect_gitignore = bool(config.get("respect_gitignore", True))
|
||||
|
||||
ignore_rules = parse_ignore_rules(ignore_patterns)
|
||||
git_checker = GitIgnoreChecker(repo_root) if respect_gitignore else None
|
||||
|
||||
for item in include:
|
||||
if not item:
|
||||
continue
|
||||
path = repo_root / item
|
||||
if not path.exists():
|
||||
raise RuntimeError(f"Included path does not exist: {item}")
|
||||
|
||||
base_rel = path.relative_to(repo_root).as_posix()
|
||||
base_depth = 0 # include root itself counts as depth 0
|
||||
walk_include(
|
||||
root,
|
||||
repo_root,
|
||||
path,
|
||||
max_depth,
|
||||
ignore_rules,
|
||||
git_checker,
|
||||
prune,
|
||||
base_rel=base_rel,
|
||||
base_depth=base_depth,
|
||||
)
|
||||
|
||||
virtual_entries = config.get("virtual_entries", {})
|
||||
if isinstance(virtual_entries, list):
|
||||
tmp: dict[str, str] = {}
|
||||
for item in virtual_entries:
|
||||
if not isinstance(item, dict):
|
||||
raise RuntimeError(
|
||||
"virtual_entries list items must be mappings with 'path' and optional 'comment'"
|
||||
)
|
||||
p = item.get("path")
|
||||
c = item.get("comment", "")
|
||||
if p:
|
||||
tmp[str(p)] = str(c) if c is not None else ""
|
||||
items = tmp
|
||||
elif isinstance(virtual_entries, dict):
|
||||
items = {str(k): ("" if v is None else str(v)) for k, v in virtual_entries.items()}
|
||||
else:
|
||||
raise RuntimeError("virtual_entries must be a mapping or a list")
|
||||
|
||||
for path_str in items:
|
||||
if not path_str:
|
||||
continue
|
||||
is_dir = str(path_str).endswith("/")
|
||||
norm = normalize_key(str(path_str))
|
||||
if norm:
|
||||
add_path(root, norm.split("/"), is_dir=is_dir)
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def sort_children(node: Node, path: str, order_map: dict[str, list[str]]) -> list[Node]:
|
||||
"""Sort child nodes with optional ordered patterns."""
|
||||
order_list = order_map.get(path, [])
|
||||
|
||||
def order_index(name: str) -> int | None:
|
||||
for idx, pattern in enumerate(order_list):
|
||||
if pattern.startswith("glob:"):
|
||||
if fnmatch.fnmatch(name, pattern[5:]):
|
||||
return idx
|
||||
elif name == pattern:
|
||||
return idx
|
||||
return None
|
||||
|
||||
def sort_key(child: Node) -> tuple[int, int, str]:
|
||||
idx = order_index(child.name)
|
||||
if idx is not None:
|
||||
return (0, idx, child.name.lower())
|
||||
# dirs first, then files
|
||||
return (1, 0 if child.is_dir else 1, child.name.lower())
|
||||
|
||||
return sorted(node.children.values(), key=sort_key)
|
||||
|
||||
|
||||
def render_tree(root: Node, comments: dict[str, str], order_map: dict[str, list[str]]) -> list[str]:
|
||||
"""Render the tree into a list of display lines."""
|
||||
lines: list[str] = [f"{root.name}/"]
|
||||
|
||||
def render_node(node: Node, prefix: str, path: str, is_last: bool) -> None:
|
||||
connector = "└── " if is_last else "├── "
|
||||
name = f"{node.name}/" if node.is_dir else node.name
|
||||
comment = comments.get(path, "")
|
||||
line = f"{prefix}{connector}{name}"
|
||||
if comment:
|
||||
line += f" # {comment}"
|
||||
lines.append(line)
|
||||
|
||||
if not node.children:
|
||||
return
|
||||
|
||||
child_prefix = f"{prefix}{' ' if is_last else '│ '}"
|
||||
children = sort_children(node, path, order_map)
|
||||
for idx, child in enumerate(children):
|
||||
child_path = f"{path}/{child.name}" if path else child.name
|
||||
render_node(child, child_prefix, child_path, idx == len(children) - 1)
|
||||
|
||||
children = sort_children(root, "", order_map)
|
||||
for idx, child in enumerate(children):
|
||||
render_node(child, "", child.name, idx == len(children) - 1)
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def update_document(
|
||||
doc_path: Path,
|
||||
marker_start: str,
|
||||
marker_end: str,
|
||||
block: str,
|
||||
check: bool,
|
||||
*,
|
||||
debug: bool = False,
|
||||
debug_context: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
content = doc_path.read_text(encoding="utf-8")
|
||||
|
||||
start = content.find(marker_start)
|
||||
if start == -1:
|
||||
raise RuntimeError("Tree start marker not found in document")
|
||||
|
||||
end = content.find(marker_end, start + len(marker_start))
|
||||
if end == -1:
|
||||
raise RuntimeError("Tree end marker not found in document")
|
||||
|
||||
updated = content[: start + len(marker_start)] + "\n" + block + "\n" + content[end:]
|
||||
|
||||
if check:
|
||||
if updated != content:
|
||||
if debug:
|
||||
if debug_context:
|
||||
print("README TREE CHECK DEBUG CONTEXT:", file=sys.stderr)
|
||||
for k, v in debug_context.items():
|
||||
print(f"- {k}: {v}", file=sys.stderr)
|
||||
|
||||
diff = difflib.unified_diff(
|
||||
content.splitlines(keepends=True),
|
||||
updated.splitlines(keepends=True),
|
||||
fromfile=str(doc_path),
|
||||
tofile=str(doc_path) + " (expected)",
|
||||
)
|
||||
print("\n".join(diff), file=sys.stderr)
|
||||
|
||||
raise RuntimeError("Tree block is out of date")
|
||||
return
|
||||
|
||||
doc_path.write_text(updated, encoding="utf-8")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""CLI entry point for updating the README tree block."""
|
||||
parser = argparse.ArgumentParser(description="Update README tree block.")
|
||||
parser.add_argument(
|
||||
"--config",
|
||||
default="tools/readme_tree/config.yaml",
|
||||
help="Path to the tree config file.",
|
||||
)
|
||||
parser.add_argument("--check", action="store_true", help="Fail if updates are needed.")
|
||||
parser.add_argument("--debug", action="store_true", help="Print debug info on mismatch.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
config_path = Path(args.config)
|
||||
if not config_path.exists():
|
||||
print(f"Config not found: {config_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
repo_root = find_repo_root(config_path)
|
||||
config = load_config(config_path)
|
||||
|
||||
doc_path = repo_root / config.get("doc_path", "docs/README-GENERATION.md")
|
||||
if not doc_path.exists():
|
||||
print(f"Doc not found: {doc_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
tree = build_tree(config, repo_root)
|
||||
|
||||
comments = {normalize_key(k): v for k, v in config.get("entries", {}).items()}
|
||||
virtual_comments = config.get("virtual_entries", {})
|
||||
if isinstance(virtual_comments, dict):
|
||||
for key, value in virtual_comments.items():
|
||||
if value is None:
|
||||
continue
|
||||
comments.setdefault(normalize_key(key), str(value))
|
||||
|
||||
order_map = {normalize_key(k): v for k, v in config.get("order", {}).items()}
|
||||
|
||||
lines = render_tree(tree, comments, order_map)
|
||||
block = "```" + "\n" + "\n".join(lines) + "\n```"
|
||||
|
||||
debug_context = {
|
||||
"python": sys.version.replace("\n", " "),
|
||||
"platform": platform.platform(),
|
||||
"cwd": str(Path.cwd()),
|
||||
"doc_path": str(doc_path),
|
||||
"config_path": str(config_path),
|
||||
"LANG": os.environ.get("LANG", ""),
|
||||
"LC_ALL": os.environ.get("LC_ALL", ""),
|
||||
}
|
||||
|
||||
try:
|
||||
debug_context["git_version"] = subprocess.check_output(
|
||||
["git", "--version"], text=True
|
||||
).strip()
|
||||
debug_context["git_toplevel"] = subprocess.check_output(
|
||||
["git", "-C", str(repo_root), "rev-parse", "--show-toplevel"], text=True
|
||||
).strip()
|
||||
debug_context["git_core_ignorecase"] = subprocess.check_output(
|
||||
["git", "-C", str(repo_root), "config", "--get", "core.ignorecase"],
|
||||
text=True,
|
||||
).strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
update_document(
|
||||
doc_path,
|
||||
config.get("marker_start", "<!-- TREE:START -->"),
|
||||
config.get("marker_end", "<!-- TREE:END -->"),
|
||||
block,
|
||||
args.check,
|
||||
debug=args.debug,
|
||||
debug_context=debug_context,
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user