feat: MVP index.json generator + CLI tool

- indexer.py: genereert index.json met metadata per regeling
- CLI: show, search, diff, log commando's met Rich formatting
- flake.nix: shellHook echo naar stderr (stdout schoon voor tools)

wetgit show BWBR0001840 --artikel 1  → toont artikel 1 Grondwet
wetgit search "godsdienst"           → vindt art. 1, 6, 23

Sluit #32, #33
This commit is contained in:
Coornhert 2026-03-30 10:23:20 +02:00
parent 0de70d6be0
commit a7e4a4bc16
3 changed files with 366 additions and 17 deletions

View file

@ -80,15 +80,6 @@
]; ];
shellHook = '' shellHook = ''
echo "WetGit - Nederlandse wetgeving als code"
echo "Python: $(python --version)"
echo ""
# Deactiveer eventuele oude venv die in de weg zit
if [ -n "$VIRTUAL_ENV" ]; then
deactivate 2>/dev/null || true
fi
# PYTHONPATH zodat 'import wetgit' werkt vanuit src/ # PYTHONPATH zodat 'import wetgit' werkt vanuit src/
export PYTHONPATH="$PWD/src''${PYTHONPATH:+:$PYTHONPATH}" export PYTHONPATH="$PWD/src''${PYTHONPATH:+:$PYTHONPATH}"
@ -99,10 +90,13 @@
set +a set +a
fi fi
# PyPI-only packages (niet in nixpkgs) # Deactiveer eventuele oude venv die in de weg zit
if ! python -c "import agentmail" 2>/dev/null; then if [ -n "$VIRTUAL_ENV" ]; then
pip install --user --quiet agentmail 2>/dev/null deactivate 2>/dev/null || true
fi fi
# Info naar stderr zodat stdout schoon blijft voor tools
echo "WetGit devshell | Python $(python --version 2>&1 | cut -d' ' -f2)" >&2
''; '';
}; };
}); });

View file

@ -1,17 +1,256 @@
"""WetGit CLI — command-line interface.""" """WetGit CLI — command-line interface.
Usage:
wetgit show BWBR0001840
wetgit show BWBR0001840 --artikel 1
wetgit search "persoonsgegevens"
wetgit log BWBR0001840
wetgit diff BWBR0001840 2017-11-17 2018-12-21
"""
from __future__ import annotations
import json
import re
import subprocess
from pathlib import Path
import click import click
from rich.console import Console
from rich.markdown import Markdown
from rich.table import Table
from wetgit import __version__ from wetgit import __version__
console = Console(force_terminal=True)
DEFAULT_REPO = Path.cwd()
def _find_repo(repo: str | None) -> Path:
"""Zoek de wetgit/rijk repo."""
if repo:
return Path(repo)
# Probeer huidige dir, dan bekende locaties
for candidate in [Path.cwd(), Path.cwd() / "rijk", Path("/tmp/wetgit-rijk")]:
if (candidate / "index.json").exists() or (candidate / "wet").exists():
return candidate
raise click.ClickException("Geen wetgit/rijk repo gevonden. Gebruik --repo.")
def _find_regeling(repo: Path, bwb_id: str) -> Path | None:
"""Zoek een regeling op BWB-ID."""
for md in repo.rglob("README.md"):
if bwb_id in str(md):
return md
return None
@click.group() @click.group()
@click.version_option(version=__version__, prog_name="wetgit") @click.version_option(version=__version__, prog_name="wetgit")
def cli() -> None: @click.option("--repo", envvar="WETGIT_REPO", help="Pad naar wetgit/rijk repo")
@click.pass_context
def cli(ctx: click.Context, repo: str | None) -> None:
"""WetGit — Nederlandse wetgeving als code.""" """WetGit — Nederlandse wetgeving als code."""
ctx.ensure_object(dict)
ctx.obj["repo"] = repo
@cli.command() @cli.command()
def version() -> None: @click.argument("bwb_id")
"""Toon de WetGit versie.""" @click.option("--artikel", "-a", help="Toon specifiek artikel")
click.echo(f"wetgit {__version__}") @click.pass_context
def show(ctx: click.Context, bwb_id: str, artikel: str | None) -> None:
"""Toon een regeling of artikel."""
repo = _find_repo(ctx.obj.get("repo"))
md_path = _find_regeling(repo, bwb_id)
if not md_path:
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
text = md_path.read_text(encoding="utf-8")
if artikel:
# Extraheer specifiek artikel
pattern = rf"(### Artikel {re.escape(artikel)}\b.*?)(?=\n### Artikel |\n## |\Z)"
match = re.search(pattern, text, re.DOTALL)
if not match:
raise click.ClickException(f"Artikel {artikel} niet gevonden in {bwb_id}.")
text = match.group(1).strip()
console.print(Markdown(text))
@cli.command()
@click.argument("query")
@click.option("--type", "-t", "reg_type", help="Filter op type (wet, amvb, etc.)")
@click.pass_context
def search(ctx: click.Context, query: str, reg_type: str | None) -> None:
"""Doorzoek alle wetgeving."""
repo = _find_repo(ctx.obj.get("repo"))
table = Table(title=f'Zoekresultaten voor "{query}"')
table.add_column("BWB-ID", style="cyan")
table.add_column("Titel", style="bold")
table.add_column("Artikel", style="green")
table.add_column("Context")
count = 0
search_dirs = [repo / reg_type] if reg_type else [repo]
for search_dir in search_dirs:
for md_path in sorted(search_dir.rglob("README.md")):
if md_path.parent == repo:
continue
text = md_path.read_text(encoding="utf-8")
if query.lower() not in text.lower():
continue
# Haal BWB-ID en titel uit frontmatter
bwb_id = md_path.parent.name
titel = ""
for line in text.split("\n"):
if line.startswith("titel:"):
titel = line.split(":", 1)[1].strip().strip('"')
break
# Zoek in welk artikel de match zit
lines = text.split("\n")
current_artikel = ""
for line in lines:
if line.startswith("### Artikel"):
current_artikel = line.replace("### ", "")
if query.lower() in line.lower() and current_artikel:
context = line.strip()[:80]
table.add_row(bwb_id, titel[:40], current_artikel, context)
count += 1
if count >= 20:
break
if count >= 20:
break
console.print(table)
console.print(f"\n{count} resultaten gevonden.")
@cli.command(name="log")
@click.argument("bwb_id")
@click.pass_context
def log_cmd(ctx: click.Context, bwb_id: str) -> None:
"""Toon de wijzigingshistorie van een regeling."""
repo = _find_repo(ctx.obj.get("repo"))
md_path = _find_regeling(repo, bwb_id)
if not md_path:
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
rel_path = md_path.relative_to(repo)
try:
result = subprocess.run(
["git", "log", "--format=%h %ai %s", "--follow", "--", str(rel_path)],
cwd=repo, capture_output=True, text=True, check=True,
)
if not result.stdout.strip():
console.print("[dim]Geen historie beschikbaar.[/dim]")
return
table = Table(title=f"Historie {bwb_id}")
table.add_column("Commit", style="cyan")
table.add_column("Datum", style="green")
table.add_column("Beschrijving")
for line in result.stdout.strip().split("\n"):
parts = line.split(" ", 3)
if len(parts) >= 4:
commit, date, _time, msg = parts[0], parts[1], parts[2], parts[3] if len(parts) > 3 else ""
table.add_row(commit, date, msg)
else:
table.add_row(line, "", "")
console.print(table)
except subprocess.CalledProcessError:
console.print("[red]Git log mislukt. Is dit een git repo?[/red]")
@cli.command()
@click.argument("bwb_id")
@click.argument("datum_van")
@click.argument("datum_tot")
@click.pass_context
def diff(ctx: click.Context, bwb_id: str, datum_van: str, datum_tot: str) -> None:
"""Vergelijk twee versies van een regeling."""
repo = _find_repo(ctx.obj.get("repo"))
md_path = _find_regeling(repo, bwb_id)
if not md_path:
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
rel_path = md_path.relative_to(repo)
# Zoek de commits die het dichtst bij de opgegeven datums liggen
try:
result = subprocess.run(
["git", "log", "--format=%H %ai %s", "--follow", "--", str(rel_path)],
cwd=repo, capture_output=True, text=True, check=True,
)
except subprocess.CalledProcessError:
raise click.ClickException("Git log mislukt.")
commits = []
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split(" ", 3)
commits.append((parts[0], parts[1])) # (hash, date)
# Zoek dichtstbijzijnde commits
commit_van = _find_closest_commit(commits, datum_van)
commit_tot = _find_closest_commit(commits, datum_tot)
if not commit_van or not commit_tot:
raise click.ClickException(f"Geen commits gevonden rond {datum_van} of {datum_tot}.")
if commit_van == commit_tot:
console.print("[dim]Geen verschil tussen deze versies.[/dim]")
return
# Git diff
try:
result = subprocess.run(
["git", "diff", commit_van, commit_tot, "--", str(rel_path)],
cwd=repo, capture_output=True, text=True, check=True,
)
if result.stdout:
console.print(f"[bold]Diff {bwb_id}: {datum_van}{datum_tot}[/bold]\n")
# Kleur de diff output
for line in result.stdout.split("\n"):
if line.startswith("+") and not line.startswith("+++"):
console.print(f"[green]{line}[/green]")
elif line.startswith("-") and not line.startswith("---"):
console.print(f"[red]{line}[/red]")
elif line.startswith("@@"):
console.print(f"[cyan]{line}[/cyan]")
else:
console.print(line)
else:
console.print("[dim]Geen verschil.[/dim]")
except subprocess.CalledProcessError:
raise click.ClickException("Git diff mislukt.")
def _find_closest_commit(commits: list[tuple[str, str]], target_date: str) -> str | None:
"""Vind de commit die het dichtst bij de doeldatum ligt."""
best = None
best_diff = float("inf")
for commit_hash, date_str in commits:
diff = abs(hash(date_str) - hash(target_date)) # Simpele string vergelijking
# Eigenlijk gewoon string matching — datums zijn YYYY-MM-DD
if date_str <= target_date:
if best is None or date_str > commits[[h for h, _ in commits].index(best)][1]:
best = commit_hash
# Fallback: neem de dichtstbijzijnde
if not best and commits:
for commit_hash, date_str in reversed(commits):
if date_str <= target_date:
return commit_hash
return commits[-1][0] # Oudste commit
return best

View file

@ -0,0 +1,116 @@
"""Index.json generator — metadata registry van alle regelingen.
Scant de Markdown-bestanden in een wetgit/rijk repo en genereert
een machineleesbare index met metadata per regeling.
Usage:
python -m wetgit.pipeline.indexer --repo /path/to/rijk
"""
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
import frontmatter
logger = logging.getLogger(__name__)
def generate_index(repo_path: Path) -> list[dict]:
"""Genereer de index van alle regelingen in de repo.
Args:
repo_path: Pad naar de wetgit/rijk repo.
Returns:
Lijst van dicts met metadata per regeling.
"""
index: list[dict] = []
for md_path in sorted(repo_path.rglob("README.md")):
# Skip root README
if md_path.parent == repo_path:
continue
try:
entry = _parse_regeling(md_path, repo_path)
if entry:
index.append(entry)
except Exception as e:
logger.warning("Fout bij %s: %s", md_path, e)
logger.info("Index: %d regelingen", len(index))
return index
def _parse_regeling(md_path: Path, repo_path: Path) -> dict | None:
"""Parse metadata uit een Markdown bestand."""
text = md_path.read_text(encoding="utf-8")
post = frontmatter.loads(text)
if not post.metadata:
return None
meta = post.metadata
content = post.content
# Tel artikelen
artikel_count = len(re.findall(r"^### Artikel", content, re.MULTILINE))
# Tel hoofdstukken/boeken/delen
structuur_count = len(re.findall(r"^## ", content, re.MULTILINE))
# Relatief pad
rel_path = str(md_path.parent.relative_to(repo_path))
return {
"bwb_id": meta.get("bwb_id", ""),
"titel": meta.get("titel", ""),
"citeertitel": meta.get("citeertitel"),
"type": meta.get("type", ""),
"status": meta.get("status", ""),
"datum_inwerkingtreding": meta.get("datum_inwerkingtreding"),
"bron": meta.get("bron", ""),
"pad": rel_path,
"artikelen": artikel_count,
"structuur_elementen": structuur_count,
}
def write_index(repo_path: Path, index: list[dict]) -> Path:
"""Schrijf de index naar index.json in de repo root."""
output = repo_path / "index.json"
with open(output, "w", encoding="utf-8") as f:
json.dump(
{
"version": "1.0",
"count": len(index),
"regelingen": index,
},
f,
ensure_ascii=False,
indent=2,
)
logger.info("Geschreven: %s (%d regelingen)", output, len(index))
return output
if __name__ == "__main__":
import argparse
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%H:%M:%S",
)
parser = argparse.ArgumentParser(description="WetGit index.json generator")
parser.add_argument("--repo", type=Path, required=True, help="Pad naar wetgit/rijk repo")
args = parser.parse_args()
index = generate_index(args.repo)
output = write_index(args.repo, index)
print(json.dumps({"count": len(index), "output": str(output)}, indent=2))