File: //home/arjun/projects/env/lib/python3.10/site-packages/cssselect2/tree.py
import functools
from warnings import warn
from webencodings import ascii_lower
from .compiler import compile_selector_list, split_whitespace
if hasattr(functools, 'cached_property'):
# Python 3.8+
cached_property = functools.cached_property
else:
# Python 3.7
class cached_property:
# Borrowed from Werkzeug
# https://github.com/mitsuhiko/werkzeug/blob/master/werkzeug/utils.py
def __init__(self, func, name=None, doc=None):
self.__name__ = name or func.__name__
self.__module__ = func.__module__
self.__doc__ = doc or func.__doc__
self.func = func
def __get__(self, obj, type=None, __missing=object()):
if obj is None:
return self
value = obj.__dict__.get(self.__name__, __missing)
if value is __missing:
value = self.func(obj)
obj.__dict__[self.__name__] = value
return value
class ElementWrapper:
"""Wrapper of :class:`xml.etree.ElementTree.Element` for Selector matching.
This class should not be instanciated directly. :meth:`from_xml_root` or
:meth:`from_html_root` should be used for the root element of a document,
and other elements should be accessed (and wrappers generated) using
methods such as :meth:`iter_children` and :meth:`iter_subtree`.
:class:`ElementWrapper` objects compare equal if their underlying
:class:`xml.etree.ElementTree.Element` do.
"""
@classmethod
def from_xml_root(cls, root, content_language=None):
"""Wrap for selector matching the root of an XML or XHTML document.
:param root:
An ElementTree :class:`xml.etree.ElementTree.Element`
for the root element of a document.
If the given element is not the root,
selector matching will behave is if it were.
In other words, selectors will be not be `scoped`_
to the subtree rooted at that element.
:returns:
A new :class:`ElementWrapper`
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
"""
return cls._from_root(root, content_language, in_html_document=False)
@classmethod
def from_html_root(cls, root, content_language=None):
"""Same as :meth:`from_xml_root` with case-insensitive attribute names.
Useful for documents parsed with an HTML parser like html5lib, which
should be the case of documents with the ``text/html`` MIME type.
"""
return cls._from_root(root, content_language, in_html_document=True)
@classmethod
def _from_root(cls, root, content_language, in_html_document=True):
if hasattr(root, 'getroot'):
root = root.getroot()
return cls(
root, parent=None, index=0, previous=None,
in_html_document=in_html_document,
content_language=content_language)
def __init__(self, etree_element, parent, index, previous,
in_html_document, content_language=None):
#: The underlying ElementTree :class:`xml.etree.ElementTree.Element`
self.etree_element = etree_element
#: The parent :class:`ElementWrapper`,
#: or :obj:`None` for the root element.
self.parent = parent
#: The previous sibling :class:`ElementWrapper`,
#: or :obj:`None` for the root element.
self.previous = previous
if parent is not None:
#: The :attr:`parent`’s children
#: as a list of
#: ElementTree :class:`xml.etree.ElementTree.Element`\ s.
#: For the root (which has no parent)
self.etree_siblings = parent.etree_children
else:
self.etree_siblings = [etree_element]
#: The position within the :attr:`parent`’s children, counting from 0.
#: ``e.etree_siblings[e.index]`` is always ``e.etree_element``.
self.index = index
self.in_html_document = in_html_document
self.transport_content_language = content_language
# Cache
self._ancestors = None
self._previous_siblings = None
def __eq__(self, other):
return (
type(self) == type(other) and
self.etree_element == other.etree_element)
def __ne__(self, other):
return not (self == other)
def __hash__(self):
return hash((type(self), self.etree_element))
def __iter__(self):
yield from self.iter_children()
@property
def ancestors(self):
"""Tuple of existing ancestors.
Tuple of existing :class:`ElementWrapper` objects for this element’s
ancestors, in reversed tree order, from :attr:`parent` to the root.
"""
if self._ancestors is None:
self._ancestors = (
() if self.parent is None else
self.parent.ancestors + (self.parent,))
return self._ancestors
@property
def previous_siblings(self):
"""Tuple of previous siblings.
Tuple of existing :class:`ElementWrapper` objects for this element’s
previous siblings, in reversed tree order.
"""
if self._previous_siblings is None:
self._previous_siblings = (
() if self.previous is None else
self.previous.previous_siblings + (self.previous,))
return self._previous_siblings
def iter_ancestors(self):
"""Iterate over ancestors.
Return an iterator of existing :class:`ElementWrapper` objects for this
element’s ancestors, in reversed tree order (from :attr:`parent` to the
root).
The element itself is not included, this is an empty sequence for the
root element.
This method is deprecated and will be removed in version 0.7.0. Use
:attr:`ancestors` instead.
"""
warn(
'This method is deprecated and will be removed in version 0.7.0. '
'Use the "ancestors" attribute instead.',
DeprecationWarning)
yield from self.ancestors
def iter_previous_siblings(self):
"""Iterate over previous siblings.
Return an iterator of existing :class:`ElementWrapper` objects for this
element’s previous siblings, in reversed tree order.
The element itself is not included, this is an empty sequence for a
first child or the root element.
This method is deprecated and will be removed in version 0.7.0. Use
:attr:`previous_siblings` instead.
"""
warn(
'This method is deprecated and will be removed in version 0.7.0. '
'Use the "previous_siblings" attribute instead.',
DeprecationWarning)
yield from self.previous_siblings
def iter_siblings(self):
"""Iterate over siblings.
Return an iterator of newly-created :class:`ElementWrapper` objects for
this element’s siblings, in tree order.
"""
if self.parent is None:
yield self
else:
yield from self.parent.iter_children()
def iter_next_siblings(self):
"""Iterate over next siblings.
Return an iterator of newly-created :class:`ElementWrapper` objects for
this element’s next siblings, in tree order.
"""
found = False
for sibling in self.iter_siblings():
if found:
yield sibling
if sibling == self:
found = True
def iter_children(self):
"""Iterate over children.
Return an iterator of newly-created :class:`ElementWrapper` objects for
this element’s child elements, in tree order.
"""
child = None
for i, etree_child in enumerate(self.etree_children):
child = type(self)(
etree_child, parent=self, index=i, previous=child,
in_html_document=self.in_html_document)
yield child
def iter_subtree(self):
"""Iterate over subtree.
Return an iterator of newly-created :class:`ElementWrapper` objects for
the entire subtree rooted at this element, in tree order.
Unlike in other methods, the element itself *is* included.
This loops over an entire document:
.. code-block:: python
for element in ElementWrapper.from_root(root_etree).iter_subtree():
...
"""
stack = [iter([self])]
while stack:
element = next(stack[-1], None)
if element is None:
stack.pop()
else:
yield element
stack.append(element.iter_children())
@staticmethod
def _compile(selectors):
return [
compiled_selector.test
for selector in selectors
for compiled_selector in (
[selector] if hasattr(selector, 'test')
else compile_selector_list(selector))
if compiled_selector.pseudo_element is None and
not compiled_selector.never_matches]
def matches(self, *selectors):
"""Return wether this elememt matches any of the given selectors.
:param selectors:
Each given selector is either a :class:`compiler.CompiledSelector`,
or an argument to :func:`compile_selector_list`.
"""
return any(test(self) for test in self._compile(selectors))
def query_all(self, *selectors):
"""Return elements, in tree order, that match any of given selectors.
Selectors are `scoped`_ to the subtree rooted at this element.
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
:param selectors:
Each given selector is either a :class:`compiler.CompiledSelector`,
or an argument to :func:`compile_selector_list`.
:returns:
An iterator of newly-created :class:`ElementWrapper` objects.
"""
tests = self._compile(selectors)
if len(tests) == 1:
return filter(tests[0], self.iter_subtree())
elif selectors:
return (
element for element in self.iter_subtree()
if any(test(element) for test in tests))
else:
return iter(())
def query(self, *selectors):
"""Return first element that matches any of given selectors.
:param selectors:
Each given selector is either a :class:`compiler.CompiledSelector`,
or an argument to :func:`compile_selector_list`.
:returns:
A newly-created :class:`ElementWrapper` object,
or :obj:`None` if there is no match.
"""
return next(self.query_all(*selectors), None)
@cached_property
def etree_children(self):
"""Children as a list of :class:`xml.etree.ElementTree.Element`.
Other ElementTree nodes such as
:func:`comments <xml.etree.ElementTree.Comment>` and
:func:`processing instructions
<xml.etree.ElementTree.ProcessingInstruction>`
are not included.
"""
return [
element for element in self.etree_element
if isinstance(element.tag, str)]
@cached_property
def local_name(self):
"""The local name of this element, as a string."""
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
self.__dict__[str('namespace_url')] = namespace_url
return local_name
@cached_property
def namespace_url(self):
"""The namespace URL of this element, as a string."""
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
self.__dict__[str('local_name')] = local_name
return namespace_url
@cached_property
def id(self):
"""The ID of this element, as a string."""
return self.etree_element.get('id')
@cached_property
def classes(self):
"""The classes of this element, as a :class:`set` of strings."""
return set(split_whitespace(self.etree_element.get('class', '')))
@cached_property
def lang(self):
"""The language of this element, as a string."""
# http://whatwg.org/C#language
xml_lang = self.etree_element.get(
'{http://www.w3.org/XML/1998/namespace}lang')
if xml_lang is not None:
return ascii_lower(xml_lang)
is_html = (
self.in_html_document or
self.namespace_url == 'http://www.w3.org/1999/xhtml')
if is_html:
lang = self.etree_element.get('lang')
if lang is not None:
return ascii_lower(lang)
if self.parent is not None:
return self.parent.lang
# Root elememnt
if is_html:
content_language = None
iterator = self.etree_element.iter(
'{http://www.w3.org/1999/xhtml}meta')
for meta in iterator:
http_equiv = meta.get('http-equiv', '')
if ascii_lower(http_equiv) == 'content-language':
content_language = _parse_content_language(
meta.get('content'))
if content_language is not None:
return ascii_lower(content_language)
# Empty string means unknown
return _parse_content_language(self.transport_content_language) or ''
@cached_property
def in_disabled_fieldset(self):
if self.parent is None:
return False
fieldset = '{http://www.w3.org/1999/xhtml}fieldset'
legend = '{http://www.w3.org/1999/xhtml}legend'
disabled_fieldset = (
self.parent.etree_element.tag == fieldset and
self.parent.etree_element.get('disabled') is not None and (
self.etree_element.tag != legend or any(
sibling.etree_element.tag == legend
for sibling in self.iter_previous_siblings())))
return disabled_fieldset or self.parent.in_disabled_fieldset
def _split_etree_tag(tag):
position = tag.rfind('}')
if position == -1:
return '', tag
else:
assert tag[0] == '{'
return tag[1:position], tag[position+1:]
def _parse_content_language(value):
if value is not None and ',' not in value:
parts = split_whitespace(value)
if len(parts) == 1:
return parts[0]