feat: MVP index.json generator + CLI tool
- indexer.py: genereert index.json met metadata per regeling - CLI: show, search, diff, log commando's met Rich formatting - flake.nix: shellHook echo naar stderr (stdout schoon voor tools) wetgit show BWBR0001840 --artikel 1 → toont artikel 1 Grondwet wetgit search "godsdienst" → vindt art. 1, 6, 23 Sluit #32, #33
This commit is contained in:
parent
0de70d6be0
commit
a7e4a4bc16
3 changed files with 366 additions and 17 deletions
18
flake.nix
18
flake.nix
|
|
@ -80,15 +80,6 @@
|
|||
];
|
||||
|
||||
shellHook = ''
|
||||
echo "WetGit - Nederlandse wetgeving als code"
|
||||
echo "Python: $(python --version)"
|
||||
echo ""
|
||||
|
||||
# Deactiveer eventuele oude venv die in de weg zit
|
||||
if [ -n "$VIRTUAL_ENV" ]; then
|
||||
deactivate 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# PYTHONPATH zodat 'import wetgit' werkt vanuit src/
|
||||
export PYTHONPATH="$PWD/src''${PYTHONPATH:+:$PYTHONPATH}"
|
||||
|
||||
|
|
@ -99,10 +90,13 @@
|
|||
set +a
|
||||
fi
|
||||
|
||||
# PyPI-only packages (niet in nixpkgs)
|
||||
if ! python -c "import agentmail" 2>/dev/null; then
|
||||
pip install --user --quiet agentmail 2>/dev/null
|
||||
# Deactiveer eventuele oude venv die in de weg zit
|
||||
if [ -n "$VIRTUAL_ENV" ]; then
|
||||
deactivate 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Info naar stderr zodat stdout schoon blijft voor tools
|
||||
echo "WetGit devshell | Python $(python --version 2>&1 | cut -d' ' -f2)" >&2
|
||||
'';
|
||||
};
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,17 +1,256 @@
|
|||
"""WetGit CLI — command-line interface."""
|
||||
"""WetGit CLI — command-line interface.
|
||||
|
||||
Usage:
|
||||
wetgit show BWBR0001840
|
||||
wetgit show BWBR0001840 --artikel 1
|
||||
wetgit search "persoonsgegevens"
|
||||
wetgit log BWBR0001840
|
||||
wetgit diff BWBR0001840 2017-11-17 2018-12-21
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
from rich.markdown import Markdown
|
||||
from rich.table import Table
|
||||
|
||||
from wetgit import __version__
|
||||
|
||||
console = Console(force_terminal=True)
|
||||
|
||||
DEFAULT_REPO = Path.cwd()
|
||||
|
||||
|
||||
def _find_repo(repo: str | None) -> Path:
|
||||
"""Zoek de wetgit/rijk repo."""
|
||||
if repo:
|
||||
return Path(repo)
|
||||
# Probeer huidige dir, dan bekende locaties
|
||||
for candidate in [Path.cwd(), Path.cwd() / "rijk", Path("/tmp/wetgit-rijk")]:
|
||||
if (candidate / "index.json").exists() or (candidate / "wet").exists():
|
||||
return candidate
|
||||
raise click.ClickException("Geen wetgit/rijk repo gevonden. Gebruik --repo.")
|
||||
|
||||
|
||||
def _find_regeling(repo: Path, bwb_id: str) -> Path | None:
|
||||
"""Zoek een regeling op BWB-ID."""
|
||||
for md in repo.rglob("README.md"):
|
||||
if bwb_id in str(md):
|
||||
return md
|
||||
return None
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version=__version__, prog_name="wetgit")
|
||||
def cli() -> None:
|
||||
@click.option("--repo", envvar="WETGIT_REPO", help="Pad naar wetgit/rijk repo")
|
||||
@click.pass_context
|
||||
def cli(ctx: click.Context, repo: str | None) -> None:
|
||||
"""WetGit — Nederlandse wetgeving als code."""
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj["repo"] = repo
|
||||
|
||||
|
||||
@cli.command()
|
||||
def version() -> None:
|
||||
"""Toon de WetGit versie."""
|
||||
click.echo(f"wetgit {__version__}")
|
||||
@click.argument("bwb_id")
|
||||
@click.option("--artikel", "-a", help="Toon specifiek artikel")
|
||||
@click.pass_context
|
||||
def show(ctx: click.Context, bwb_id: str, artikel: str | None) -> None:
|
||||
"""Toon een regeling of artikel."""
|
||||
repo = _find_repo(ctx.obj.get("repo"))
|
||||
md_path = _find_regeling(repo, bwb_id)
|
||||
if not md_path:
|
||||
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
|
||||
|
||||
text = md_path.read_text(encoding="utf-8")
|
||||
|
||||
if artikel:
|
||||
# Extraheer specifiek artikel
|
||||
pattern = rf"(### Artikel {re.escape(artikel)}\b.*?)(?=\n### Artikel |\n## |\Z)"
|
||||
match = re.search(pattern, text, re.DOTALL)
|
||||
if not match:
|
||||
raise click.ClickException(f"Artikel {artikel} niet gevonden in {bwb_id}.")
|
||||
text = match.group(1).strip()
|
||||
|
||||
console.print(Markdown(text))
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("query")
|
||||
@click.option("--type", "-t", "reg_type", help="Filter op type (wet, amvb, etc.)")
|
||||
@click.pass_context
|
||||
def search(ctx: click.Context, query: str, reg_type: str | None) -> None:
|
||||
"""Doorzoek alle wetgeving."""
|
||||
repo = _find_repo(ctx.obj.get("repo"))
|
||||
|
||||
table = Table(title=f'Zoekresultaten voor "{query}"')
|
||||
table.add_column("BWB-ID", style="cyan")
|
||||
table.add_column("Titel", style="bold")
|
||||
table.add_column("Artikel", style="green")
|
||||
table.add_column("Context")
|
||||
|
||||
count = 0
|
||||
search_dirs = [repo / reg_type] if reg_type else [repo]
|
||||
|
||||
for search_dir in search_dirs:
|
||||
for md_path in sorted(search_dir.rglob("README.md")):
|
||||
if md_path.parent == repo:
|
||||
continue
|
||||
text = md_path.read_text(encoding="utf-8")
|
||||
if query.lower() not in text.lower():
|
||||
continue
|
||||
|
||||
# Haal BWB-ID en titel uit frontmatter
|
||||
bwb_id = md_path.parent.name
|
||||
titel = ""
|
||||
for line in text.split("\n"):
|
||||
if line.startswith("titel:"):
|
||||
titel = line.split(":", 1)[1].strip().strip('"')
|
||||
break
|
||||
|
||||
# Zoek in welk artikel de match zit
|
||||
lines = text.split("\n")
|
||||
current_artikel = ""
|
||||
for line in lines:
|
||||
if line.startswith("### Artikel"):
|
||||
current_artikel = line.replace("### ", "")
|
||||
if query.lower() in line.lower() and current_artikel:
|
||||
context = line.strip()[:80]
|
||||
table.add_row(bwb_id, titel[:40], current_artikel, context)
|
||||
count += 1
|
||||
if count >= 20:
|
||||
break
|
||||
if count >= 20:
|
||||
break
|
||||
|
||||
console.print(table)
|
||||
console.print(f"\n{count} resultaten gevonden.")
|
||||
|
||||
|
||||
@cli.command(name="log")
|
||||
@click.argument("bwb_id")
|
||||
@click.pass_context
|
||||
def log_cmd(ctx: click.Context, bwb_id: str) -> None:
|
||||
"""Toon de wijzigingshistorie van een regeling."""
|
||||
repo = _find_repo(ctx.obj.get("repo"))
|
||||
md_path = _find_regeling(repo, bwb_id)
|
||||
if not md_path:
|
||||
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
|
||||
|
||||
rel_path = md_path.relative_to(repo)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "--format=%h %ai %s", "--follow", "--", str(rel_path)],
|
||||
cwd=repo, capture_output=True, text=True, check=True,
|
||||
)
|
||||
if not result.stdout.strip():
|
||||
console.print("[dim]Geen historie beschikbaar.[/dim]")
|
||||
return
|
||||
|
||||
table = Table(title=f"Historie {bwb_id}")
|
||||
table.add_column("Commit", style="cyan")
|
||||
table.add_column("Datum", style="green")
|
||||
table.add_column("Beschrijving")
|
||||
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
parts = line.split(" ", 3)
|
||||
if len(parts) >= 4:
|
||||
commit, date, _time, msg = parts[0], parts[1], parts[2], parts[3] if len(parts) > 3 else ""
|
||||
table.add_row(commit, date, msg)
|
||||
else:
|
||||
table.add_row(line, "", "")
|
||||
|
||||
console.print(table)
|
||||
|
||||
except subprocess.CalledProcessError:
|
||||
console.print("[red]Git log mislukt. Is dit een git repo?[/red]")
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("bwb_id")
|
||||
@click.argument("datum_van")
|
||||
@click.argument("datum_tot")
|
||||
@click.pass_context
|
||||
def diff(ctx: click.Context, bwb_id: str, datum_van: str, datum_tot: str) -> None:
|
||||
"""Vergelijk twee versies van een regeling."""
|
||||
repo = _find_repo(ctx.obj.get("repo"))
|
||||
md_path = _find_regeling(repo, bwb_id)
|
||||
if not md_path:
|
||||
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
|
||||
|
||||
rel_path = md_path.relative_to(repo)
|
||||
|
||||
# Zoek de commits die het dichtst bij de opgegeven datums liggen
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "--format=%H %ai %s", "--follow", "--", str(rel_path)],
|
||||
cwd=repo, capture_output=True, text=True, check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
raise click.ClickException("Git log mislukt.")
|
||||
|
||||
commits = []
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(" ", 3)
|
||||
commits.append((parts[0], parts[1])) # (hash, date)
|
||||
|
||||
# Zoek dichtstbijzijnde commits
|
||||
commit_van = _find_closest_commit(commits, datum_van)
|
||||
commit_tot = _find_closest_commit(commits, datum_tot)
|
||||
|
||||
if not commit_van or not commit_tot:
|
||||
raise click.ClickException(f"Geen commits gevonden rond {datum_van} of {datum_tot}.")
|
||||
|
||||
if commit_van == commit_tot:
|
||||
console.print("[dim]Geen verschil tussen deze versies.[/dim]")
|
||||
return
|
||||
|
||||
# Git diff
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", commit_van, commit_tot, "--", str(rel_path)],
|
||||
cwd=repo, capture_output=True, text=True, check=True,
|
||||
)
|
||||
if result.stdout:
|
||||
console.print(f"[bold]Diff {bwb_id}: {datum_van} → {datum_tot}[/bold]\n")
|
||||
# Kleur de diff output
|
||||
for line in result.stdout.split("\n"):
|
||||
if line.startswith("+") and not line.startswith("+++"):
|
||||
console.print(f"[green]{line}[/green]")
|
||||
elif line.startswith("-") and not line.startswith("---"):
|
||||
console.print(f"[red]{line}[/red]")
|
||||
elif line.startswith("@@"):
|
||||
console.print(f"[cyan]{line}[/cyan]")
|
||||
else:
|
||||
console.print(line)
|
||||
else:
|
||||
console.print("[dim]Geen verschil.[/dim]")
|
||||
except subprocess.CalledProcessError:
|
||||
raise click.ClickException("Git diff mislukt.")
|
||||
|
||||
|
||||
def _find_closest_commit(commits: list[tuple[str, str]], target_date: str) -> str | None:
|
||||
"""Vind de commit die het dichtst bij de doeldatum ligt."""
|
||||
best = None
|
||||
best_diff = float("inf")
|
||||
for commit_hash, date_str in commits:
|
||||
diff = abs(hash(date_str) - hash(target_date)) # Simpele string vergelijking
|
||||
# Eigenlijk gewoon string matching — datums zijn YYYY-MM-DD
|
||||
if date_str <= target_date:
|
||||
if best is None or date_str > commits[[h for h, _ in commits].index(best)][1]:
|
||||
best = commit_hash
|
||||
# Fallback: neem de dichtstbijzijnde
|
||||
if not best and commits:
|
||||
for commit_hash, date_str in reversed(commits):
|
||||
if date_str <= target_date:
|
||||
return commit_hash
|
||||
return commits[-1][0] # Oudste commit
|
||||
return best
|
||||
|
|
|
|||
116
src/wetgit/pipeline/indexer.py
Normal file
116
src/wetgit/pipeline/indexer.py
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
"""Index.json generator — metadata registry van alle regelingen.
|
||||
|
||||
Scant de Markdown-bestanden in een wetgit/rijk repo en genereert
|
||||
een machineleesbare index met metadata per regeling.
|
||||
|
||||
Usage:
|
||||
python -m wetgit.pipeline.indexer --repo /path/to/rijk
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import frontmatter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_index(repo_path: Path) -> list[dict]:
|
||||
"""Genereer de index van alle regelingen in de repo.
|
||||
|
||||
Args:
|
||||
repo_path: Pad naar de wetgit/rijk repo.
|
||||
|
||||
Returns:
|
||||
Lijst van dicts met metadata per regeling.
|
||||
"""
|
||||
index: list[dict] = []
|
||||
|
||||
for md_path in sorted(repo_path.rglob("README.md")):
|
||||
# Skip root README
|
||||
if md_path.parent == repo_path:
|
||||
continue
|
||||
|
||||
try:
|
||||
entry = _parse_regeling(md_path, repo_path)
|
||||
if entry:
|
||||
index.append(entry)
|
||||
except Exception as e:
|
||||
logger.warning("Fout bij %s: %s", md_path, e)
|
||||
|
||||
logger.info("Index: %d regelingen", len(index))
|
||||
return index
|
||||
|
||||
|
||||
def _parse_regeling(md_path: Path, repo_path: Path) -> dict | None:
|
||||
"""Parse metadata uit een Markdown bestand."""
|
||||
text = md_path.read_text(encoding="utf-8")
|
||||
post = frontmatter.loads(text)
|
||||
|
||||
if not post.metadata:
|
||||
return None
|
||||
|
||||
meta = post.metadata
|
||||
content = post.content
|
||||
|
||||
# Tel artikelen
|
||||
artikel_count = len(re.findall(r"^### Artikel", content, re.MULTILINE))
|
||||
|
||||
# Tel hoofdstukken/boeken/delen
|
||||
structuur_count = len(re.findall(r"^## ", content, re.MULTILINE))
|
||||
|
||||
# Relatief pad
|
||||
rel_path = str(md_path.parent.relative_to(repo_path))
|
||||
|
||||
return {
|
||||
"bwb_id": meta.get("bwb_id", ""),
|
||||
"titel": meta.get("titel", ""),
|
||||
"citeertitel": meta.get("citeertitel"),
|
||||
"type": meta.get("type", ""),
|
||||
"status": meta.get("status", ""),
|
||||
"datum_inwerkingtreding": meta.get("datum_inwerkingtreding"),
|
||||
"bron": meta.get("bron", ""),
|
||||
"pad": rel_path,
|
||||
"artikelen": artikel_count,
|
||||
"structuur_elementen": structuur_count,
|
||||
}
|
||||
|
||||
|
||||
def write_index(repo_path: Path, index: list[dict]) -> Path:
|
||||
"""Schrijf de index naar index.json in de repo root."""
|
||||
output = repo_path / "index.json"
|
||||
with open(output, "w", encoding="utf-8") as f:
|
||||
json.dump(
|
||||
{
|
||||
"version": "1.0",
|
||||
"count": len(index),
|
||||
"regelingen": index,
|
||||
},
|
||||
f,
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
logger.info("Geschreven: %s (%d regelingen)", output, len(index))
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description="WetGit index.json generator")
|
||||
parser.add_argument("--repo", type=Path, required=True, help="Pad naar wetgit/rijk repo")
|
||||
args = parser.parse_args()
|
||||
|
||||
index = generate_index(args.repo)
|
||||
output = write_index(args.repo, index)
|
||||
print(json.dumps({"count": len(index), "output": str(output)}, indent=2))
|
||||
Loading…
Add table
Reference in a new issue