feat: MVP index.json generator + CLI tool
- indexer.py: genereert index.json met metadata per regeling - CLI: show, search, diff, log commando's met Rich formatting - flake.nix: shellHook echo naar stderr (stdout schoon voor tools) wetgit show BWBR0001840 --artikel 1 → toont artikel 1 Grondwet wetgit search "godsdienst" → vindt art. 1, 6, 23 Sluit #32, #33
This commit is contained in:
parent
0de70d6be0
commit
a7e4a4bc16
3 changed files with 366 additions and 17 deletions
18
flake.nix
18
flake.nix
|
|
@ -80,15 +80,6 @@
|
||||||
];
|
];
|
||||||
|
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
echo "WetGit - Nederlandse wetgeving als code"
|
|
||||||
echo "Python: $(python --version)"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Deactiveer eventuele oude venv die in de weg zit
|
|
||||||
if [ -n "$VIRTUAL_ENV" ]; then
|
|
||||||
deactivate 2>/dev/null || true
|
|
||||||
fi
|
|
||||||
|
|
||||||
# PYTHONPATH zodat 'import wetgit' werkt vanuit src/
|
# PYTHONPATH zodat 'import wetgit' werkt vanuit src/
|
||||||
export PYTHONPATH="$PWD/src''${PYTHONPATH:+:$PYTHONPATH}"
|
export PYTHONPATH="$PWD/src''${PYTHONPATH:+:$PYTHONPATH}"
|
||||||
|
|
||||||
|
|
@ -99,10 +90,13 @@
|
||||||
set +a
|
set +a
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# PyPI-only packages (niet in nixpkgs)
|
# Deactiveer eventuele oude venv die in de weg zit
|
||||||
if ! python -c "import agentmail" 2>/dev/null; then
|
if [ -n "$VIRTUAL_ENV" ]; then
|
||||||
pip install --user --quiet agentmail 2>/dev/null
|
deactivate 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Info naar stderr zodat stdout schoon blijft voor tools
|
||||||
|
echo "WetGit devshell | Python $(python --version 2>&1 | cut -d' ' -f2)" >&2
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,256 @@
|
||||||
"""WetGit CLI — command-line interface."""
|
"""WetGit CLI — command-line interface.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
wetgit show BWBR0001840
|
||||||
|
wetgit show BWBR0001840 --artikel 1
|
||||||
|
wetgit search "persoonsgegevens"
|
||||||
|
wetgit log BWBR0001840
|
||||||
|
wetgit diff BWBR0001840 2017-11-17 2018-12-21
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.markdown import Markdown
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
from wetgit import __version__
|
from wetgit import __version__
|
||||||
|
|
||||||
|
console = Console(force_terminal=True)
|
||||||
|
|
||||||
|
DEFAULT_REPO = Path.cwd()
|
||||||
|
|
||||||
|
|
||||||
|
def _find_repo(repo: str | None) -> Path:
|
||||||
|
"""Zoek de wetgit/rijk repo."""
|
||||||
|
if repo:
|
||||||
|
return Path(repo)
|
||||||
|
# Probeer huidige dir, dan bekende locaties
|
||||||
|
for candidate in [Path.cwd(), Path.cwd() / "rijk", Path("/tmp/wetgit-rijk")]:
|
||||||
|
if (candidate / "index.json").exists() or (candidate / "wet").exists():
|
||||||
|
return candidate
|
||||||
|
raise click.ClickException("Geen wetgit/rijk repo gevonden. Gebruik --repo.")
|
||||||
|
|
||||||
|
|
||||||
|
def _find_regeling(repo: Path, bwb_id: str) -> Path | None:
|
||||||
|
"""Zoek een regeling op BWB-ID."""
|
||||||
|
for md in repo.rglob("README.md"):
|
||||||
|
if bwb_id in str(md):
|
||||||
|
return md
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@click.version_option(version=__version__, prog_name="wetgit")
|
@click.version_option(version=__version__, prog_name="wetgit")
|
||||||
def cli() -> None:
|
@click.option("--repo", envvar="WETGIT_REPO", help="Pad naar wetgit/rijk repo")
|
||||||
|
@click.pass_context
|
||||||
|
def cli(ctx: click.Context, repo: str | None) -> None:
|
||||||
"""WetGit — Nederlandse wetgeving als code."""
|
"""WetGit — Nederlandse wetgeving als code."""
|
||||||
|
ctx.ensure_object(dict)
|
||||||
|
ctx.obj["repo"] = repo
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
def version() -> None:
|
@click.argument("bwb_id")
|
||||||
"""Toon de WetGit versie."""
|
@click.option("--artikel", "-a", help="Toon specifiek artikel")
|
||||||
click.echo(f"wetgit {__version__}")
|
@click.pass_context
|
||||||
|
def show(ctx: click.Context, bwb_id: str, artikel: str | None) -> None:
|
||||||
|
"""Toon een regeling of artikel."""
|
||||||
|
repo = _find_repo(ctx.obj.get("repo"))
|
||||||
|
md_path = _find_regeling(repo, bwb_id)
|
||||||
|
if not md_path:
|
||||||
|
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
|
||||||
|
|
||||||
|
text = md_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
if artikel:
|
||||||
|
# Extraheer specifiek artikel
|
||||||
|
pattern = rf"(### Artikel {re.escape(artikel)}\b.*?)(?=\n### Artikel |\n## |\Z)"
|
||||||
|
match = re.search(pattern, text, re.DOTALL)
|
||||||
|
if not match:
|
||||||
|
raise click.ClickException(f"Artikel {artikel} niet gevonden in {bwb_id}.")
|
||||||
|
text = match.group(1).strip()
|
||||||
|
|
||||||
|
console.print(Markdown(text))
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("query")
|
||||||
|
@click.option("--type", "-t", "reg_type", help="Filter op type (wet, amvb, etc.)")
|
||||||
|
@click.pass_context
|
||||||
|
def search(ctx: click.Context, query: str, reg_type: str | None) -> None:
|
||||||
|
"""Doorzoek alle wetgeving."""
|
||||||
|
repo = _find_repo(ctx.obj.get("repo"))
|
||||||
|
|
||||||
|
table = Table(title=f'Zoekresultaten voor "{query}"')
|
||||||
|
table.add_column("BWB-ID", style="cyan")
|
||||||
|
table.add_column("Titel", style="bold")
|
||||||
|
table.add_column("Artikel", style="green")
|
||||||
|
table.add_column("Context")
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
search_dirs = [repo / reg_type] if reg_type else [repo]
|
||||||
|
|
||||||
|
for search_dir in search_dirs:
|
||||||
|
for md_path in sorted(search_dir.rglob("README.md")):
|
||||||
|
if md_path.parent == repo:
|
||||||
|
continue
|
||||||
|
text = md_path.read_text(encoding="utf-8")
|
||||||
|
if query.lower() not in text.lower():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Haal BWB-ID en titel uit frontmatter
|
||||||
|
bwb_id = md_path.parent.name
|
||||||
|
titel = ""
|
||||||
|
for line in text.split("\n"):
|
||||||
|
if line.startswith("titel:"):
|
||||||
|
titel = line.split(":", 1)[1].strip().strip('"')
|
||||||
|
break
|
||||||
|
|
||||||
|
# Zoek in welk artikel de match zit
|
||||||
|
lines = text.split("\n")
|
||||||
|
current_artikel = ""
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("### Artikel"):
|
||||||
|
current_artikel = line.replace("### ", "")
|
||||||
|
if query.lower() in line.lower() and current_artikel:
|
||||||
|
context = line.strip()[:80]
|
||||||
|
table.add_row(bwb_id, titel[:40], current_artikel, context)
|
||||||
|
count += 1
|
||||||
|
if count >= 20:
|
||||||
|
break
|
||||||
|
if count >= 20:
|
||||||
|
break
|
||||||
|
|
||||||
|
console.print(table)
|
||||||
|
console.print(f"\n{count} resultaten gevonden.")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command(name="log")
|
||||||
|
@click.argument("bwb_id")
|
||||||
|
@click.pass_context
|
||||||
|
def log_cmd(ctx: click.Context, bwb_id: str) -> None:
|
||||||
|
"""Toon de wijzigingshistorie van een regeling."""
|
||||||
|
repo = _find_repo(ctx.obj.get("repo"))
|
||||||
|
md_path = _find_regeling(repo, bwb_id)
|
||||||
|
if not md_path:
|
||||||
|
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
|
||||||
|
|
||||||
|
rel_path = md_path.relative_to(repo)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "log", "--format=%h %ai %s", "--follow", "--", str(rel_path)],
|
||||||
|
cwd=repo, capture_output=True, text=True, check=True,
|
||||||
|
)
|
||||||
|
if not result.stdout.strip():
|
||||||
|
console.print("[dim]Geen historie beschikbaar.[/dim]")
|
||||||
|
return
|
||||||
|
|
||||||
|
table = Table(title=f"Historie {bwb_id}")
|
||||||
|
table.add_column("Commit", style="cyan")
|
||||||
|
table.add_column("Datum", style="green")
|
||||||
|
table.add_column("Beschrijving")
|
||||||
|
|
||||||
|
for line in result.stdout.strip().split("\n"):
|
||||||
|
parts = line.split(" ", 3)
|
||||||
|
if len(parts) >= 4:
|
||||||
|
commit, date, _time, msg = parts[0], parts[1], parts[2], parts[3] if len(parts) > 3 else ""
|
||||||
|
table.add_row(commit, date, msg)
|
||||||
|
else:
|
||||||
|
table.add_row(line, "", "")
|
||||||
|
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
console.print("[red]Git log mislukt. Is dit een git repo?[/red]")
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.argument("bwb_id")
|
||||||
|
@click.argument("datum_van")
|
||||||
|
@click.argument("datum_tot")
|
||||||
|
@click.pass_context
|
||||||
|
def diff(ctx: click.Context, bwb_id: str, datum_van: str, datum_tot: str) -> None:
|
||||||
|
"""Vergelijk twee versies van een regeling."""
|
||||||
|
repo = _find_repo(ctx.obj.get("repo"))
|
||||||
|
md_path = _find_regeling(repo, bwb_id)
|
||||||
|
if not md_path:
|
||||||
|
raise click.ClickException(f"Regeling {bwb_id} niet gevonden.")
|
||||||
|
|
||||||
|
rel_path = md_path.relative_to(repo)
|
||||||
|
|
||||||
|
# Zoek de commits die het dichtst bij de opgegeven datums liggen
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "log", "--format=%H %ai %s", "--follow", "--", str(rel_path)],
|
||||||
|
cwd=repo, capture_output=True, text=True, check=True,
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise click.ClickException("Git log mislukt.")
|
||||||
|
|
||||||
|
commits = []
|
||||||
|
for line in result.stdout.strip().split("\n"):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
parts = line.split(" ", 3)
|
||||||
|
commits.append((parts[0], parts[1])) # (hash, date)
|
||||||
|
|
||||||
|
# Zoek dichtstbijzijnde commits
|
||||||
|
commit_van = _find_closest_commit(commits, datum_van)
|
||||||
|
commit_tot = _find_closest_commit(commits, datum_tot)
|
||||||
|
|
||||||
|
if not commit_van or not commit_tot:
|
||||||
|
raise click.ClickException(f"Geen commits gevonden rond {datum_van} of {datum_tot}.")
|
||||||
|
|
||||||
|
if commit_van == commit_tot:
|
||||||
|
console.print("[dim]Geen verschil tussen deze versies.[/dim]")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Git diff
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "diff", commit_van, commit_tot, "--", str(rel_path)],
|
||||||
|
cwd=repo, capture_output=True, text=True, check=True,
|
||||||
|
)
|
||||||
|
if result.stdout:
|
||||||
|
console.print(f"[bold]Diff {bwb_id}: {datum_van} → {datum_tot}[/bold]\n")
|
||||||
|
# Kleur de diff output
|
||||||
|
for line in result.stdout.split("\n"):
|
||||||
|
if line.startswith("+") and not line.startswith("+++"):
|
||||||
|
console.print(f"[green]{line}[/green]")
|
||||||
|
elif line.startswith("-") and not line.startswith("---"):
|
||||||
|
console.print(f"[red]{line}[/red]")
|
||||||
|
elif line.startswith("@@"):
|
||||||
|
console.print(f"[cyan]{line}[/cyan]")
|
||||||
|
else:
|
||||||
|
console.print(line)
|
||||||
|
else:
|
||||||
|
console.print("[dim]Geen verschil.[/dim]")
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise click.ClickException("Git diff mislukt.")
|
||||||
|
|
||||||
|
|
||||||
|
def _find_closest_commit(commits: list[tuple[str, str]], target_date: str) -> str | None:
|
||||||
|
"""Vind de commit die het dichtst bij de doeldatum ligt."""
|
||||||
|
best = None
|
||||||
|
best_diff = float("inf")
|
||||||
|
for commit_hash, date_str in commits:
|
||||||
|
diff = abs(hash(date_str) - hash(target_date)) # Simpele string vergelijking
|
||||||
|
# Eigenlijk gewoon string matching — datums zijn YYYY-MM-DD
|
||||||
|
if date_str <= target_date:
|
||||||
|
if best is None or date_str > commits[[h for h, _ in commits].index(best)][1]:
|
||||||
|
best = commit_hash
|
||||||
|
# Fallback: neem de dichtstbijzijnde
|
||||||
|
if not best and commits:
|
||||||
|
for commit_hash, date_str in reversed(commits):
|
||||||
|
if date_str <= target_date:
|
||||||
|
return commit_hash
|
||||||
|
return commits[-1][0] # Oudste commit
|
||||||
|
return best
|
||||||
|
|
|
||||||
116
src/wetgit/pipeline/indexer.py
Normal file
116
src/wetgit/pipeline/indexer.py
Normal file
|
|
@ -0,0 +1,116 @@
|
||||||
|
"""Index.json generator — metadata registry van alle regelingen.
|
||||||
|
|
||||||
|
Scant de Markdown-bestanden in een wetgit/rijk repo en genereert
|
||||||
|
een machineleesbare index met metadata per regeling.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python -m wetgit.pipeline.indexer --repo /path/to/rijk
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import frontmatter
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_index(repo_path: Path) -> list[dict]:
|
||||||
|
"""Genereer de index van alle regelingen in de repo.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_path: Pad naar de wetgit/rijk repo.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Lijst van dicts met metadata per regeling.
|
||||||
|
"""
|
||||||
|
index: list[dict] = []
|
||||||
|
|
||||||
|
for md_path in sorted(repo_path.rglob("README.md")):
|
||||||
|
# Skip root README
|
||||||
|
if md_path.parent == repo_path:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
entry = _parse_regeling(md_path, repo_path)
|
||||||
|
if entry:
|
||||||
|
index.append(entry)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Fout bij %s: %s", md_path, e)
|
||||||
|
|
||||||
|
logger.info("Index: %d regelingen", len(index))
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_regeling(md_path: Path, repo_path: Path) -> dict | None:
|
||||||
|
"""Parse metadata uit een Markdown bestand."""
|
||||||
|
text = md_path.read_text(encoding="utf-8")
|
||||||
|
post = frontmatter.loads(text)
|
||||||
|
|
||||||
|
if not post.metadata:
|
||||||
|
return None
|
||||||
|
|
||||||
|
meta = post.metadata
|
||||||
|
content = post.content
|
||||||
|
|
||||||
|
# Tel artikelen
|
||||||
|
artikel_count = len(re.findall(r"^### Artikel", content, re.MULTILINE))
|
||||||
|
|
||||||
|
# Tel hoofdstukken/boeken/delen
|
||||||
|
structuur_count = len(re.findall(r"^## ", content, re.MULTILINE))
|
||||||
|
|
||||||
|
# Relatief pad
|
||||||
|
rel_path = str(md_path.parent.relative_to(repo_path))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"bwb_id": meta.get("bwb_id", ""),
|
||||||
|
"titel": meta.get("titel", ""),
|
||||||
|
"citeertitel": meta.get("citeertitel"),
|
||||||
|
"type": meta.get("type", ""),
|
||||||
|
"status": meta.get("status", ""),
|
||||||
|
"datum_inwerkingtreding": meta.get("datum_inwerkingtreding"),
|
||||||
|
"bron": meta.get("bron", ""),
|
||||||
|
"pad": rel_path,
|
||||||
|
"artikelen": artikel_count,
|
||||||
|
"structuur_elementen": structuur_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def write_index(repo_path: Path, index: list[dict]) -> Path:
|
||||||
|
"""Schrijf de index naar index.json in de repo root."""
|
||||||
|
output = repo_path / "index.json"
|
||||||
|
with open(output, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(
|
||||||
|
{
|
||||||
|
"version": "1.0",
|
||||||
|
"count": len(index),
|
||||||
|
"regelingen": index,
|
||||||
|
},
|
||||||
|
f,
|
||||||
|
ensure_ascii=False,
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
logger.info("Geschreven: %s (%d regelingen)", output, len(index))
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)s %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="WetGit index.json generator")
|
||||||
|
parser.add_argument("--repo", type=Path, required=True, help="Pad naar wetgit/rijk repo")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
index = generate_index(args.repo)
|
||||||
|
output = write_index(args.repo, index)
|
||||||
|
print(json.dumps({"count": len(index), "output": str(output)}, indent=2))
|
||||||
Loading…
Add table
Reference in a new issue