"""Tests voor de BWB XML parser.""" import pytest from wetgit.pipeline.bwb_parser import parse_bwb_xml @pytest.fixture def grondwet_xml(tmp_path): """Download de Grondwet XML voor tests.""" import httpx url = "https://repository.officiele-overheidspublicaties.nl/bwb/BWBR0001840/2023-02-22_0/xml/BWBR0001840_2023-02-22_0.xml" resp = httpx.get(url, timeout=30) resp.raise_for_status() xml_path = tmp_path / "grondwet.xml" xml_path.write_bytes(resp.content) return str(xml_path) @pytest.mark.integration class TestBWBParser: def test_parse_grondwet_metadata(self, grondwet_xml: str) -> None: result = parse_bwb_xml(grondwet_xml) assert result.bwb_id == "BWBR0001840" assert result.titel == "Grondwet" assert result.soort == "wet" def test_parse_grondwet_frontmatter(self, grondwet_xml: str) -> None: result = parse_bwb_xml(grondwet_xml) assert result.frontmatter["bwb_id"] == "BWBR0001840" assert result.frontmatter["type"] == "wet" assert result.frontmatter["status"] == "geldend" assert "wetten.overheid.nl" in str(result.frontmatter["bron"]) def test_parse_grondwet_has_artikel_1(self, grondwet_xml: str) -> None: result = parse_bwb_xml(grondwet_xml) assert "### Artikel 1" in result.markdown assert "gelijke gevallen gelijk behandeld" in result.markdown def test_parse_grondwet_has_hoofdstukken(self, grondwet_xml: str) -> None: result = parse_bwb_xml(grondwet_xml) assert "## Hoofdstuk 1" in result.markdown assert "## Hoofdstuk 2" in result.markdown def test_parse_grondwet_yaml_frontmatter(self, grondwet_xml: str) -> None: result = parse_bwb_xml(grondwet_xml) assert result.markdown.startswith("---\n") assert "\n---\n" in result.markdown def test_parse_grondwet_markdown_structure(self, grondwet_xml: str) -> None: result = parse_bwb_xml(grondwet_xml) # Should start with frontmatter then h1 lines = result.markdown.split("\n") assert lines[0] == "---" # Find the h1 h1_lines = [l for l in lines if l.startswith("# ")] assert len(h1_lines) == 1 assert h1_lines[0] == "# Grondwet"