import pytest
from byteb4rb1e.utils.http.parser import Node, TreeBuilder
@pytest.fixture
def sample_dom():
"""
Build a small DOM tree for testing:
"""
html = """
Hello
World
Inside
Inside Too
"""
parser = TreeBuilder()
parser.feed(html)
return parser.root.children[0] # the
class TestGetElementsByTagName:
def test_find_all_spans(self, sample_dom):
spans = sample_dom.get_elements_by_tag_name("span")
assert len(spans) == 3
assert spans[0].tag == "span"
assert spans[1].tag == "span"
assert spans[2].tag == "span"
def test_find_no_matches(self, sample_dom):
assert sample_dom.get_elements_by_tag_name("table") == []
class TestGetElementsByClassName:
def test_find_single_class(self, sample_dom):
items = sample_dom.get_elements_by_class_name("text")
assert len(items) == 2
def test_find_multiple_classes(self, sample_dom):
items = sample_dom.get_elements_by_class_name("highlight")
assert len(items) == 1
assert items[0].tag == "span"
def test_no_such_class(self, sample_dom):
assert sample_dom.get_elements_by_class_name("missing") == []
class TestGetElementById:
def test_find_existing_id(self, sample_dom):
node = sample_dom.get_element_by_id("inner")
assert node is not None
assert node.tag == "span"
assert node.inner_content == "Inside"
def test_missing_id(self, sample_dom):
assert sample_dom.get_element_by_id("nope") is None
class TestQuerySelectorAll:
def test_class_selector(self, sample_dom):
items = sample_dom.query_selector_all(".text")
assert len(items) == 2
def test_id_selector(self, sample_dom):
items = sample_dom.query_selector_all("#inner")
assert len(items) == 1
assert items[0].inner_content == "Inside"
def test_tag_selector(self, sample_dom):
items = sample_dom.query_selector_all("p")
assert len(items) == 1
assert items[0].inner_content == "Hello"
def test_chained_selector(self, sample_dom):
items = sample_dom.query_selector_all(".text .highlight")
assert len(items) == 1
assert items[0].inner_content == "World"
def test_direct_child(self, sample_dom):
items = sample_dom.query_selector_all(".box > #inner")
assert len(items) == 1
assert items[0].inner_content == "Inside"
def test_direct_child_no_match(self, sample_dom):
items = sample_dom.query_selector_all("div > span.highlight")
# highlight span is NOT a direct child of inner div
assert len(items) == 0
def test_attribute_match(self, sample_dom):
items = sample_dom.query_selector_all('[id="inner"]')
assert len(items) == 1
assert items[0].inner_content == "Inside"
def test_attribute_no_match(self, sample_dom):
items = sample_dom.query_selector_all('[data-x="nope"]')
assert items == []
def test_tag_class(self, sample_dom):
items = sample_dom.query_selector_all("span.highlight")
assert len(items) == 1
assert items[0].inner_content == "World"
def test_multiple_classes(self, sample_dom):
items = sample_dom.query_selector_all(".text.highlight")
assert len(items) == 1
assert items[0].inner_content == "World"
def test_tag_id_class(self, sample_dom):
items = sample_dom.query_selector_all("span#inner")
assert len(items) == 1
assert items[0].inner_content == "Inside"
def test_descendant(self, sample_dom):
items = sample_dom.query_selector_all("div span")
assert len(items) == 2
class TestXPath:
def test_simple_tag(self, sample_dom):
spans = sample_dom.xpath("//span")
assert len(spans) == 3
def test_attribute_match(self, sample_dom):
nodes = sample_dom.xpath('//span[@id="inner"]')
assert len(nodes) == 1
assert nodes[0].inner_content == "Inside"
def test_nested(self, sample_dom):
nodes = sample_dom.xpath("//div[@class='box']")
assert len(nodes) == 1