feat: MVP index.json generator + CLI tool

- indexer.py: genereert index.json met metadata per regeling
- CLI: show, search, diff, log commando's met Rich formatting
- flake.nix: shellHook echo naar stderr (stdout schoon voor tools)

wetgit show BWBR0001840 --artikel 1  → toont artikel 1 Grondwet
wetgit search "godsdienst"           → vindt art. 1, 6, 23

Sluit #32, #33
This commit is contained in:
Coornhert 2026-03-30 10:23:20 +02:00
parent 0de70d6be0
commit a7e4a4bc16
3 changed files with 366 additions and 17 deletions

View file

@ -80,15 +80,6 @@
];
shellHook = ''
echo "WetGit - Nederlandse wetgeving als code"
echo "Python: $(python --version)"
echo ""
# Deactiveer eventuele oude venv die in de weg zit
if [ -n "$VIRTUAL_ENV" ]; then
deactivate 2>/dev/null || true
fi
# PYTHONPATH zodat 'import wetgit' werkt vanuit src/
export PYTHONPATH="$PWD/src''${PYTHONPATH:+:$PYTHONPATH}"
@ -99,10 +90,13 @@
set +a
fi
# PyPI-only packages (niet in nixpkgs)
if ! python -c "import agentmail" 2>/dev/null; then
pip install --user --quiet agentmail 2>/dev/null
# Deactiveer eventuele oude venv die in de weg zit
if [ -n "$VIRTUAL_ENV" ]; then
deactivate 2>/dev/null || true
fi
# Info naar stderr zodat stdout schoon blijft voor tools
echo "WetGit devshell | Python $(python --version 2>&1 | cut -d' ' -f2)" >&2
'';
};
});

View file

@ -1,17 +1,256 @@
"""WetGit CLI — command-line interface."""
"""WetGit CLI — command-line interface.
Usage:
wetgit show BWBR0001840
wetgit show BWBR0001840 --artikel 1
wetgit search "persoonsgegevens"
wetgit log BWBR0001840
wetgit diff BWBR0001840 2017-11-17 2018-12-21
"""
from __future__ import annotations
import json
import re
import subprocess
from pathlib import Path
import click
from rich.console import Console
from rich.markdown import Markdown
from rich.table import Table
from wetgit import __version__
console = Console(force_terminal=True)
DEFAULT_REPO = Path.cwd()
def _find_repo(repo: str | None) -> Path:
"""Zoek de wetgit/rijk repo."""
if repo:
return Path(repo)
# Probeer huidige dir, dan bekende locaties
for candidate in [Path.cwd(), Path.cwd() / "rijk", Path("/tmp/wetgit-rijk")]:
if (candidate / "index.json").exists() or (candidate / "wet").exists():
return candidate
raise click.ClickException("Geen wetgit/rijk repo gevonden. Gebruik --repo.")
def _find_regeling(repo: Path, bwb_id: str) -> Path | None:
"""Zoek een regeling op BWB-ID."""
for md in repo.rglob("README.md"):
if bwb_id in str(md):
return md
return None
@click.group()
@click.version_option(version=__version__, prog_name="wetgit")
def cli() -> None:
@click.option("--repo", envvar="WETGIT_REPO", help="Pad naar wetgit/rijk repo")
@click.pass_context
def cli(ctx: click.Context, repo: str | None) -> None:
"""WetGit — Nederlandse wetgeving als code."""
ctx.ensure_object(dict)
ctx.obj["repo"] = repo
@cli.command()
def version() -> None:
"""Toon de WetGit versie."""
click.echo(f"wetgit {__version__}")
@click.argument("bwb_id")
@click.option("--artikel", "-a", help="Toon specifiek artikel")
@click.pass_context
def show(ctx: click.Context, bwb_id: str, artikel: str | None) -> None:
"""Toon een regeling of artikel."""
repo = _find_repo(ctx.obj.get("repo"))
md_path = _find_regeling(repo, bwb_id)
if not md_path:
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
text = md_path.read_text(encoding="utf-8")
if artikel:
# Extraheer specifiek artikel
pattern = rf"(### Artikel {re.escape(artikel)}\b.*?)(?=\n### Artikel |\n## |\Z)"
match = re.search(pattern, text, re.DOTALL)
if not match:
raise click.ClickException(f"Artikel {artikel} niet gevonden in {bwb_id}.")
text = match.group(1).strip()
console.print(Markdown(text))
@cli.command()
@click.argument("query")
@click.option("--type", "-t", "reg_type", help="Filter op type (wet, amvb, etc.)")
@click.pass_context
def search(ctx: click.Context, query: str, reg_type: str | None) -> None:
"""Doorzoek alle wetgeving."""
repo = _find_repo(ctx.obj.get("repo"))
table = Table(title=f'Zoekresultaten voor "{query}"')
table.add_column("BWB-ID", style="cyan")
table.add_column("Titel", style="bold")
table.add_column("Artikel", style="green")
table.add_column("Context")
count = 0
search_dirs = [repo / reg_type] if reg_type else [repo]
for search_dir in search_dirs:
for md_path in sorted(search_dir.rglob("README.md")):
if md_path.parent == repo:
continue
text = md_path.read_text(encoding="utf-8")
if query.lower() not in text.lower():
continue
# Haal BWB-ID en titel uit frontmatter
bwb_id = md_path.parent.name
titel = ""
for line in text.split("\n"):
if line.startswith("titel:"):
titel = line.split(":", 1)[1].strip().strip('"')
break
# Zoek in welk artikel de match zit
lines = text.split("\n")
current_artikel = ""
for line in lines:
if line.startswith("### Artikel"):
current_artikel = line.replace("### ", "")
if query.lower() in line.lower() and current_artikel:
context = line.strip()[:80]
table.add_row(bwb_id, titel[:40], current_artikel, context)
count += 1
if count >= 20:
break
if count >= 20:
break
console.print(table)
console.print(f"\n{count} resultaten gevonden.")
@cli.command(name="log")
@click.argument("bwb_id")
@click.pass_context
def log_cmd(ctx: click.Context, bwb_id: str) -> None:
"""Toon de wijzigingshistorie van een regeling."""
repo = _find_repo(ctx.obj.get("repo"))
md_path = _find_regeling(repo, bwb_id)
if not md_path:
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
rel_path = md_path.relative_to(repo)
try:
result = subprocess.run(
["git", "log", "--format=%h %ai %s", "--follow", "--", str(rel_path)],
cwd=repo, capture_output=True, text=True, check=True,
)
if not result.stdout.strip():
console.print("[dim]Geen historie beschikbaar.[/dim]")
return
table = Table(title=f"Historie {bwb_id}")
table.add_column("Commit", style="cyan")
table.add_column("Datum", style="green")
table.add_column("Beschrijving")
for line in result.stdout.strip().split("\n"):
parts = line.split(" ", 3)
if len(parts) >= 4:
commit, date, _time, msg = parts[0], parts[1], parts[2], parts[3] if len(parts) > 3 else ""
table.add_row(commit, date, msg)
else:
table.add_row(line, "", "")
console.print(table)
except subprocess.CalledProcessError:
console.print("[red]Git log mislukt. Is dit een git repo?[/red]")
@cli.command()
@click.argument("bwb_id")
@click.argument("datum_van")
@click.argument("datum_tot")
@click.pass_context
def diff(ctx: click.Context, bwb_id: str, datum_van: str, datum_tot: str) -> None:
"""Vergelijk twee versies van een regeling."""
repo = _find_repo(ctx.obj.get("repo"))
md_path = _find_regeling(repo, bwb_id)
if not md_path:
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
rel_path = md_path.relative_to(repo)
# Zoek de commits die het dichtst bij de opgegeven datums liggen
try:
result = subprocess.run(
["git", "log", "--format=%H %ai %s", "--follow", "--", str(rel_path)],
cwd=repo, capture_output=True, text=True, check=True,
)
except subprocess.CalledProcessError:
raise click.ClickException("Git log mislukt.")
commits = []
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split(" ", 3)
commits.append((parts[0], parts[1])) # (hash, date)
# Zoek dichtstbijzijnde commits
commit_van = _find_closest_commit(commits, datum_van)
commit_tot = _find_closest_commit(commits, datum_tot)
if not commit_van or not commit_tot:
raise click.ClickException(f"Geen commits gevonden rond {datum_van} of {datum_tot}.")
if commit_van == commit_tot:
console.print("[dim]Geen verschil tussen deze versies.[/dim]")
return
# Git diff
try:
result = subprocess.run(
["git", "diff", commit_van, commit_tot, "--", str(rel_path)],
cwd=repo, capture_output=True, text=True, check=True,
)
if result.stdout:
console.print(f"[bold]Diff {bwb_id}: {datum_van}{datum_tot}[/bold]\n")
# Kleur de diff output
for line in result.stdout.split("\n"):
if line.startswith("+") and not line.startswith("+++"):
console.print(f"[green]{line}[/green]")
elif line.startswith("-") and not line.startswith("---"):
console.print(f"[red]{line}[/red]")
elif line.startswith("@@"):
console.print(f"[cyan]{line}[/cyan]")
else:
console.print(line)
else:
console.print("[dim]Geen verschil.[/dim]")
except subprocess.CalledProcessError:
raise click.ClickException("Git diff mislukt.")
def _find_closest_commit(commits: list[tuple[str, str]], target_date: str) -> str | None:
"""Vind de commit die het dichtst bij de doeldatum ligt."""
best = None
best_diff = float("inf")
for commit_hash, date_str in commits:
diff = abs(hash(date_str) - hash(target_date)) # Simpele string vergelijking
# Eigenlijk gewoon string matching — datums zijn YYYY-MM-DD
if date_str <= target_date:
if best is None or date_str > commits[[h for h, _ in commits].index(best)][1]:
best = commit_hash
# Fallback: neem de dichtstbijzijnde
if not best and commits:
for commit_hash, date_str in reversed(commits):
if date_str <= target_date:
return commit_hash
return commits[-1][0] # Oudste commit
return best

View file

@ -0,0 +1,116 @@
"""Index.json generator — metadata registry van alle regelingen.
Scant de Markdown-bestanden in een wetgit/rijk repo en genereert
een machineleesbare index met metadata per regeling.
Usage:
python -m wetgit.pipeline.indexer --repo /path/to/rijk
"""
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
import frontmatter
logger = logging.getLogger(__name__)
def generate_index(repo_path: Path) -> list[dict]:
"""Genereer de index van alle regelingen in de repo.
Args:
repo_path: Pad naar de wetgit/rijk repo.
Returns:
Lijst van dicts met metadata per regeling.
"""
index: list[dict] = []
for md_path in sorted(repo_path.rglob("README.md")):
# Skip root README
if md_path.parent == repo_path:
continue
try:
entry = _parse_regeling(md_path, repo_path)
if entry:
index.append(entry)
except Exception as e:
logger.warning("Fout bij %s: %s", md_path, e)
logger.info("Index: %d regelingen", len(index))
return index
def _parse_regeling(md_path: Path, repo_path: Path) -> dict | None:
"""Parse metadata uit een Markdown bestand."""
text = md_path.read_text(encoding="utf-8")
post = frontmatter.loads(text)
if not post.metadata:
return None
meta = post.metadata
content = post.content
# Tel artikelen
artikel_count = len(re.findall(r"^### Artikel", content, re.MULTILINE))
# Tel hoofdstukken/boeken/delen
structuur_count = len(re.findall(r"^## ", content, re.MULTILINE))
# Relatief pad
rel_path = str(md_path.parent.relative_to(repo_path))
return {
"bwb_id": meta.get("bwb_id", ""),
"titel": meta.get("titel", ""),
"citeertitel": meta.get("citeertitel"),
"type": meta.get("type", ""),
"status": meta.get("status", ""),
"datum_inwerkingtreding": meta.get("datum_inwerkingtreding"),
"bron": meta.get("bron", ""),
"pad": rel_path,
"artikelen": artikel_count,
"structuur_elementen": structuur_count,
}
def write_index(repo_path: Path, index: list[dict]) -> Path:
"""Schrijf de index naar index.json in de repo root."""
output = repo_path / "index.json"
with open(output, "w", encoding="utf-8") as f:
json.dump(
{
"version": "1.0",
"count": len(index),
"regelingen": index,
},
f,
ensure_ascii=False,
indent=2,
)
logger.info("Geschreven: %s (%d regelingen)", output, len(index))
return output
if __name__ == "__main__":
import argparse
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(description="WetGit index.json generator")
parser.add_argument("--repo", type=Path, required=True, help="Pad naar wetgit/rijk repo")
args = parser.parse_args()
index = generate_index(args.repo)
output = write_index(args.repo, index)
print(json.dumps({"count": len(index), "output": str(output)}, indent=2))