feature(http): init parser

This commit is contained in:
Tiara Rodney 2025-12-31 14:34:28 +01:00
parent cc4b567181
commit db72017810
No known key found for this signature in database
GPG key ID: 5CD8EC1D46106723
3 changed files with 398 additions and 0 deletions

View file

@ -0,0 +1,102 @@
import pytest
from byteb4rb1e.utils.http.parser import Node, TreeBuilder
@pytest.fixture
def sample_dom():
"""
Build a small DOM tree for testing:
<div id="root" class="container">
<p class="text">Hello</p>
<span class="text highlight">World</span>
<div class="box">
<span id="inner">Inside</span>
</div>
</div>
"""
html = """
<div id="root" class="container">
<p class="text">Hello</p>
<span class="text highlight">World</span>
<div class="box">
<span id="inner">Inside</span>
</div>
</div>
"""
parser = TreeBuilder()
parser.feed(html)
return parser.root.children[0] # the <div id="root">
class TestGetElementsByTagName:
def test_find_all_spans(self, sample_dom):
spans = sample_dom.get_elements_by_tag_name("span")
assert len(spans) == 2
assert spans[0].tag == "span"
assert spans[1].tag == "span"
def test_find_no_matches(self, sample_dom):
assert sample_dom.get_elements_by_tag_name("table") == []
class TestGetElementsByClassName:
def test_find_single_class(self, sample_dom):
items = sample_dom.get_elements_by_class_name("text")
assert len(items) == 2
def test_find_multiple_classes(self, sample_dom):
items = sample_dom.get_elements_by_class_name("highlight")
assert len(items) == 1
assert items[0].tag == "span"
def test_no_such_class(self, sample_dom):
assert sample_dom.get_elements_by_class_name("missing") == []
class TestGetElementById:
def test_find_existing_id(self, sample_dom):
node = sample_dom.get_element_by_id("inner")
assert node is not None
assert node.tag == "span"
assert node.inner_content == "Inside"
def test_missing_id(self, sample_dom):
assert sample_dom.get_element_by_id("nope") is None
class TestQuerySelectorAll:
def test_class_selector(self, sample_dom):
items = sample_dom.query_selector_all(".text")
assert len(items) == 2
def test_id_selector(self, sample_dom):
items = sample_dom.query_selector_all("#inner")
assert len(items) == 1
assert items[0].inner_content == "Inside"
def test_tag_selector(self, sample_dom):
items = sample_dom.query_selector_all("p")
assert len(items) == 1
assert items[0].inner_content == "Hello"
def test_chained_selector(self, sample_dom):
items = sample_dom.query_selector_all("div .highlight")
assert len(items) == 1
assert items[0].inner_content == "World"
class TestXPath:
def test_simple_tag(self, sample_dom):
spans = sample_dom.xpath("//span")
assert len(spans) == 2
def test_attribute_match(self, sample_dom):
nodes = sample_dom.xpath('//span[@id="inner"]')
assert len(nodes) == 1
assert nodes[0].inner_content == "Inside"
def test_nested(self, sample_dom):
nodes = sample_dom.xpath("//div[@class='box']")
assert len(nodes) == 1