HEX
Server: Apache/2.4.52 (Ubuntu)
System: Linux spn-python 5.15.0-89-generic #99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023 x86_64
User: arjun (1000)
PHP: 8.1.2-1ubuntu2.20
Disabled: NONE
Upload Files
File: //home/arjun/projects/env/lib/python3.10/site-packages/weasyprint/formatting_structure/build.py
"""Turn an element tree with style into a "before layout" box tree.

This includes creating anonymous boxes and processing whitespace as necessary.

"""

import re
import unicodedata

import tinycss2.color3

from .. import html
from ..css import computed_values, properties, targets
from ..logger import LOGGER
from . import boxes

# Maps values of the ``display`` CSS property to box types.
BOX_TYPE_FROM_DISPLAY = {
    ('block', 'flow'): boxes.BlockBox,
    ('inline', 'flow'): boxes.InlineBox,

    ('block', 'flow-root'): boxes.BlockBox,
    ('inline', 'flow-root'): boxes.InlineBlockBox,

    ('block', 'table'): boxes.TableBox,
    ('inline', 'table'): boxes.InlineTableBox,

    ('block', 'flex'): boxes.FlexBox,
    ('inline', 'flex'): boxes.InlineFlexBox,

    ('table-row',): boxes.TableRowBox,
    ('table-row-group',): boxes.TableRowGroupBox,
    ('table-header-group',): boxes.TableRowGroupBox,
    ('table-footer-group',): boxes.TableRowGroupBox,
    ('table-column',): boxes.TableColumnBox,
    ('table-column-group',): boxes.TableColumnGroupBox,
    ('table-cell',): boxes.TableCellBox,
    ('table-caption',): boxes.TableCaptionBox,
}

# https://stackoverflow.com/questions/16317534/
ASCII_TO_WIDE = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)}
ASCII_TO_WIDE.update({0x20: '\u3000', 0x2D: '\u2212'})

LINE_FEED_RE = re.compile('\r\n?')
TAB_RE = re.compile('[\t ]*\n[\t ]*')
SPACE_RE = re.compile('[\t ]+')


def create_anonymous_boxes(box):
    """Create anonymous boxes in box descendants according to layout rules."""
    box = anonymous_table_boxes(box)
    box = flex_boxes(box)
    box = inline_in_block(box)
    box = block_in_inline(box)
    return box


def build_formatting_structure(element_tree, style_for, get_image_from_uri,
                               base_url, target_collector, counter_style,
                               footnotes):
    """Build a formatting structure (box tree) from an element tree."""
    box_list = element_to_box(
        element_tree, style_for, get_image_from_uri, base_url,
        target_collector, counter_style, footnotes)
    if box_list:
        box, = box_list
    else:
        # No root element
        def root_style_for(element, pseudo_type=None):
            style = style_for(element, pseudo_type)
            if style is not None:
                if element == element_tree:
                    style['display'] = ('block', 'flow')
                else:
                    style['display'] = ('none',)
            return style
        box, = element_to_box(
            element_tree, root_style_for, get_image_from_uri, base_url,
            target_collector, counter_style, footnotes)

    target_collector.check_pending_targets()

    box.is_for_root_element = True
    # If this is changed, maybe update weasy.layout.page.make_margin_boxes()
    box = create_anonymous_boxes(box)
    box = set_viewport_overflow(box)
    return box


def make_box(element_tag, style, content, element):
    return BOX_TYPE_FROM_DISPLAY[style['display'][:2]](
        element_tag, style, element, content)


def element_to_box(element, style_for, get_image_from_uri, base_url,
                   target_collector, counter_style, footnotes, state=None):
    """Convert an element and its children into a box with children.

    Return a list of boxes. Most of the time the list will have one item but
    may have zero or more than one.

    Eg.::

        <p>Some <em>emphasised</em> text.</p>

    gives (not actual syntax)::

        BlockBox[
            TextBox['Some '],
            InlineBox[
                TextBox['emphasised'],
            ],
            TextBox[' text.'],
        ]

    ``TextBox``es are anonymous inline boxes:
    See https://www.w3.org/TR/CSS21/visuren.html#anonymous

    """
    if not isinstance(element.tag, str):
        # We ignore comments and XML processing instructions.
        return []

    style = style_for(element)

    # TODO: should be the used value. When does the used value for `display`
    # differ from the computer value?
    display = style['display']
    if display == ('none',):
        return []

    if style['float'] == 'footnote':
        if style['footnote_display'] == 'block':
            style['display'] = ('block', 'flow')
        else:
            # TODO: handle compact footnotes
            style['display'] = ('inline', 'flow')

    box = make_box(element.tag, style, [], element)

    if state is None:
        # use a list to have a shared mutable object
        state = (
            # Shared mutable objects:
            [0],  # quote_depth: single integer
            # TODO: define the footnote counter where it can be updated by page
            {'footnote': [0]},  # counter_values: name -> stacked/scoped values
            [{'footnote'}]  # counter_scopes: element depths -> counter names
        )
    quote_depth, counter_values, counter_scopes = state

    update_counters(state, style)

    children = []

    # If this element’s direct children create new scopes, the counter
    # names will be in this new list
    counter_scopes.append(set())

    box.first_letter_style = style_for(element, 'first-letter')
    box.first_line_style = style_for(element, 'first-line')

    marker_boxes = []
    if 'list-item' in style['display']:
        marker_boxes = list(marker_to_box(
            element, state, style, style_for, get_image_from_uri,
            target_collector, counter_style))
        children.extend(marker_boxes)

    children.extend(before_after_to_box(
        element, 'before', state, style_for, get_image_from_uri,
        target_collector, counter_style))

    # collect anchor's counter_values, maybe it's a target.
    # to get the spec-conform counter_values we must do it here,
    # after the ::before is parsed and before the ::after is
    if style['anchor']:
        target_collector.store_target(style['anchor'], counter_values, box)

    text = element.text
    if text:
        children.append(boxes.TextBox.anonymous_from(box, text))

    for child_element in element:
        child_boxes = element_to_box(
            child_element, style_for, get_image_from_uri, base_url,
            target_collector, counter_style, footnotes, state)

        if child_boxes and child_boxes[0].style['float'] == 'footnote':
            footnote = child_boxes[0]
            footnote.style['float'] = 'none'
            footnotes.append(footnote)
            call_style = style_for(element, 'footnote-call')
            footnote_call = make_box(
                f'{element.tag}::footnote-call', call_style, [], element)
            footnote_call.children = content_to_boxes(
                call_style, footnote_call, quote_depth, counter_values,
                get_image_from_uri, target_collector, counter_style)
            footnote_call.footnote = footnote
            child_boxes = [footnote_call]

        children.extend(child_boxes)
        text = child_element.tail
        if text:
            text_box = boxes.TextBox.anonymous_from(box, text)
            if children and isinstance(children[-1], boxes.TextBox):
                children[-1].text += text_box.text
            else:
                children.append(text_box)

    children.extend(before_after_to_box(
        element, 'after', state, style_for, get_image_from_uri,
        target_collector, counter_style))

    # Scopes created by this element’s children stop here.
    for name in counter_scopes.pop():
        counter_values[name].pop()
        if not counter_values[name]:
            counter_values.pop(name)

    box.children = children if style['appearance'] == 'none' else []
    process_whitespace(box)
    set_content_lists(
        element, box, style, counter_values, target_collector, counter_style)
    process_text_transform(box)

    if marker_boxes and len(box.children) == 1:
        # See https://www.w3.org/TR/css-lists-3/#list-style-position-outside
        #
        # "The size or contents of the marker box may affect the height of the
        #  principal block box and/or the height of its first line box, and in
        #  some cases may cause the creation of a new line box; this
        #  interaction is also not defined."
        #
        # We decide here to add a zero-width space to have a minimum
        # height. Adding text boxes is not the best idea, but it's not a good
        # moment to add an empty line box, and the specification lets us do
        # almost what we want, so…
        if style['list_style_position'] == 'outside':
            box.children.append(boxes.TextBox.anonymous_from(box, '​'))

    if style['float'] == 'footnote':
        counter_values['footnote'][-1] += 1
        marker_style = style_for(element, 'footnote-marker')
        marker = make_box(
            f'{element.tag}::footnote-marker', marker_style, [], element)
        marker.children = content_to_boxes(
            marker_style, box, quote_depth, counter_values, get_image_from_uri,
            target_collector, counter_style)
        box.children.insert(0, marker)

    # Specific handling for the element. (eg. replaced element)
    return html.handle_element(element, box, get_image_from_uri, base_url)


def before_after_to_box(element, pseudo_type, state, style_for,
                        get_image_from_uri, target_collector, counter_style):
    """Return the boxes for ::before or ::after pseudo-element."""
    style = style_for(element, pseudo_type)
    if pseudo_type and style is None:
        # Pseudo-elements with no style at all do not get a style dict.
        # Their initial content property computes to 'none'.
        return []

    # TODO: should be the computed value. When does the used value for
    # `display` differ from the computer value? It's at least wrong for
    # `content` where 'normal' computes as 'inhibit' for pseudo elements.
    display = style['display']
    if display == ('none',):
        return []
    content = style['content']
    if content in ('normal', 'inhibit', 'none'):
        return []
    box = make_box(f'{element.tag}::{pseudo_type}', style, [], element)

    quote_depth, counter_values, _counter_scopes = state
    update_counters(state, style)

    children = []

    if 'list-item' in display:
        marker_boxes = list(marker_to_box(
            element, state, style, style_for, get_image_from_uri,
            target_collector, counter_style))
        children.extend(marker_boxes)

    children.extend(content_to_boxes(
        style, box, quote_depth, counter_values, get_image_from_uri,
        target_collector, counter_style))

    box.children = children

    # calculate the bookmark-label
    if style['bookmark_label'] == 'none':
        box.bookmark_label = ''
    else:
        _quote_depth, counter_values, _counter_scopes = state
        compute_bookmark_label(
            element, box, style['bookmark_label'], counter_values,
            target_collector, counter_style)
    return [box]


def marker_to_box(element, state, parent_style, style_for, get_image_from_uri,
                  target_collector, counter_style):
    """Yield the box for ::marker pseudo-element if there is one.

    https://drafts.csswg.org/css-lists-3/#marker-pseudo

    """
    style = style_for(element, 'marker')

    children = []

    # TODO: should be the computed value. When does the used value for
    # `display` differ from the computer value? It's at least wrong for
    # `content` where 'normal' computes as 'inhibit' for pseudo elements.
    quote_depth, counter_values, _counter_scopes = state

    box = make_box(f'{element.tag}::marker', style, children, element)

    if style['display'] == ('none',):
        return

    image_type, image = style['list_style_image']

    if style['content'] not in ('normal', 'inhibit'):
        children.extend(content_to_boxes(
            style, box, quote_depth, counter_values, get_image_from_uri,
            target_collector, counter_style))

    else:
        if image_type == 'url':
            # image may be None here too, in case the image is not available.
            image = get_image_from_uri(
                url=image, orientation=style['image_orientation'])
            if image is not None:
                box = boxes.InlineReplacedBox.anonymous_from(box, image)
                children.append(box)

        if not children and style['list_style_type'] != 'none':
            counter_value = counter_values.get('list-item', [0])[-1]
            counter_type = style['list_style_type']
            # TODO: rtl numbered list has the dot on the left
            marker_text = counter_style.render_marker(
                counter_type, counter_value)
            box = boxes.TextBox.anonymous_from(box, marker_text)
            box.style['white_space'] = 'pre-wrap'
            children.append(box)

    if not children:
        return

    if parent_style['list_style_position'] == 'outside':
        marker_box = boxes.BlockBox.anonymous_from(box, children)
        # We can safely edit everything that can't be changed by user style
        # See https://drafts.csswg.org/css-pseudo-4/#marker-pseudo
        marker_box.style['position'] = 'absolute'
        if parent_style['direction'] == 'ltr':
            translate_x = properties.Dimension(-100, '%')
        else:
            translate_x = properties.Dimension(100, '%')
        translate_y = computed_values.ZERO_PIXELS
        marker_box.style['transform'] = (
            ('translate', (translate_x, translate_y)),)
    else:
        marker_box = boxes.InlineBox.anonymous_from(box, children)
    yield marker_box


def compute_content_list(content_list, parent_box, counter_values, css_token,
                         parse_again, target_collector, counter_style,
                         get_image_from_uri=None, quote_depth=None,
                         quote_style=None, context=None, page=None,
                         element=None):
    """Compute and return the boxes corresponding to the ``content_list``.

    ``parse_again`` is called to compute the ``content_list`` again when
    ``target_collector.lookup_target()`` detected a pending target.

    ``build_formatting_structure`` calls
    ``target_collector.check_pending_targets()`` after the first pass to do
    required reparsing.

    """
    # TODO: Some computation done here may be done in computed_values
    # instead. We currently miss at least style_for, counters and quotes
    # context in computer. Some work will still need to be done here though,
    # like box creation for URIs.

    content_boxes = []
    has_text = set()  # Use a set because variable is modified in add_text

    def add_text(text):
        has_text.add(True)
        if text:
            if content_boxes and isinstance(content_boxes[-1], boxes.TextBox):
                content_boxes[-1].text += text
            else:
                content_boxes.append(
                    boxes.TextBox.anonymous_from(parent_box, text))

    missing_counters = []
    missing_target_counters = {}
    in_page_context = context is not None and page is not None

    # Collect missing counters during build_formatting_structure.
    # Pointless to collect missing target counters in MarginBoxes.
    need_collect_missing = target_collector.collecting and not in_page_context

    if parent_box.cached_counter_values is None:
        # Store the counter_values in the parent_box to make them accessible
        # in @page context.
        parent_box.cached_counter_values = {
            key: value.copy() for key, value in counter_values.items()}
    for type_, value in content_list:
        if type_ == 'string':
            add_text(value)
        elif type_ == 'url' and get_image_from_uri is not None:
            origin, uri = value
            if origin != 'external':
                # Embedding internal references is impossible
                continue
            image = get_image_from_uri(
                url=uri, orientation=parent_box.style['image_orientation'])
            if image is not None:
                content_boxes.append(
                    boxes.InlineReplacedBox.anonymous_from(parent_box, image))
        elif type_ == 'content()':
            added_text = extract_text(value, parent_box)
            # Simulate the step of white space processing
            # (normally done during the layout)
            add_text(added_text.strip())
        elif type_ == 'string()':
            if not in_page_context:
                # string() is currently only valid in @page context
                # See https://github.com/Kozea/WeasyPrint/issues/723
                LOGGER.warning(
                    '"string(%s)" is only allowed in page margins',
                    ' '.join(value))
                continue
            add_text(context.get_string_set_for(page, *value))
        elif type_ in ('counter()', 'counters()'):
            counter_name, counter_type = value[0], value[-1]
            if counter_type == 'none':
                continue
            if need_collect_missing:
                if counter_name not in list(counter_values) + missing_counters:
                    missing_counters.append(counter_name)
            if type_ == 'counter()':
                counter_value = counter_values.get(counter_name, [0])[-1]
                text = counter_style.render_value(counter_value, counter_type)
            else:
                separator = value[1]
                text = separator.join(
                    counter_style.render_value(counter_value, counter_type)
                    for counter_value in counter_values.get(counter_name, [0]))
            add_text(text)
        elif type_ in ('target-counter()', 'target-counters()'):
            (anchor_token, counter_name), counter_type = value[:2], value[-1]
            if counter_type == 'none':
                continue
            lookup_target = target_collector.lookup_target(
                anchor_token, parent_box, css_token, parse_again)
            if lookup_target.state != 'up-to-date':
                break
            target_values = lookup_target.target_box.cached_counter_values
            if need_collect_missing and counter_name not in target_values:
                anchor_name = targets.anchor_name_from_token(anchor_token)
                missing_counters = missing_target_counters.setdefault(
                    anchor_name, [])
                if counter_name not in missing_counters:
                    missing_counters.append(counter_name)
            # Mixin target's cached page counters.
            # cached_page_counter_values are empty during layout.
            local_counters = lookup_target.cached_page_counter_values.copy()
            local_counters.update(target_values)
            if type_ == 'target-counter()':
                counter_value = local_counters.get(counter_name, [0])[-1]
                text = counter_style.render_value(counter_value, counter_type)
            else:
                separator = value[2]
                if separator[0] != 'string':
                    break
                separator_string = separator[1]
                text = separator_string.join(
                    counter_style.render_value(counter_value, counter_type)
                    for counter_value in local_counters.get(counter_name, [0]))
            add_text(text)
        elif type_ == 'target-text()':
            anchor_token, text_style = value
            lookup_target = target_collector.lookup_target(
                anchor_token, parent_box, css_token, parse_again)
            if lookup_target.state == 'up-to-date':
                target_box = lookup_target.target_box
                # TODO: 'before'- and 'after'- content referring missing
                # counters are not properly set.
                text = extract_text(text_style, target_box)
                # Simulate the step of white space processing
                # (normally done during the layout)
                add_text(text.strip())
            else:
                break
        elif type_ == 'quote' and None not in (quote_depth, quote_style):
            is_open = 'open' in value
            insert = not value.startswith('no-')
            if not is_open:
                quote_depth[0] = max(0, quote_depth[0] - 1)
            if insert:
                open_quotes, close_quotes = quote_style
                quotes = open_quotes if is_open else close_quotes
                add_text(quotes[min(quote_depth[0], len(quotes) - 1)])
            if is_open:
                quote_depth[0] += 1
        elif type_ == 'element()':
            if not in_page_context:
                LOGGER.warning(
                    '"element(%s)" is only allowed in page margins',
                    ' '.join(value))
                continue
            new_box = context.get_running_element_for(page, *value)
            if new_box is None:
                continue
            new_box = new_box.deepcopy()
            new_box.style['position'] = 'static'
            if isinstance(new_box, boxes.ParentBox):
                for child in new_box.descendants():
                    if child.style['content'] in ('normal', 'none'):
                        continue
                    child.children = content_to_boxes(
                        child.style, child, quote_depth, counter_values,
                        get_image_from_uri, target_collector, counter_style,
                        context=context, page=page)
            content_boxes.append(new_box)
        elif type_ == 'leader()':
            if not value[1]:
                continue
            text_box = boxes.TextBox.anonymous_from(parent_box, value[1])
            leader_box = boxes.InlineBox.anonymous_from(
                parent_box, (text_box,))
            # Avoid breaks inside the leader box
            leader_box.style['white_space'] = 'pre'
            # Prevent whitespaces from being removed from the text box
            text_box.style['white_space'] = 'pre'
            leader_box.is_leader = True
            content_boxes.append(leader_box)

    if has_text or content_boxes:
        # Only add CounterLookupItem if the content_list actually produced text
        target_collector.collect_missing_counters(
            parent_box, css_token, parse_again, missing_counters,
            missing_target_counters)
        return content_boxes


def content_to_boxes(style, parent_box, quote_depth, counter_values,
                     get_image_from_uri, target_collector, counter_style,
                     context=None, page=None):
    """Take the value of a ``content`` property and return boxes."""
    def parse_again(mixin_pagebased_counters=None):
        """Closure to parse the ``parent_boxes`` children all again."""

        # Neither alters the mixed-in nor the cached counter values, no
        # need to deepcopy here
        if mixin_pagebased_counters is None:
            local_counters = {}
        else:
            local_counters = mixin_pagebased_counters.copy()
        local_counters.update(parent_box.cached_counter_values)

        local_children = []
        local_children.extend(content_to_boxes(
            style, parent_box, orig_quote_depth, local_counters,
            get_image_from_uri, target_collector, counter_style))

        # TODO: do we need to add markers here?
        # TODO: redo the formatting structure of the parent instead of hacking
        # the already formatted structure. Find why inline_in_blocks has
        # sometimes already been called, and sometimes not.
        if (len(parent_box.children) == 1 and
                isinstance(parent_box.children[0], boxes.LineBox)):
            parent_box.children[0].children = local_children
        else:
            parent_box.children = local_children

    if style['content'] == 'inhibit':
        return []

    orig_quote_depth = quote_depth[:]
    css_token = 'content'
    box_list = compute_content_list(
        style['content'], parent_box, counter_values, css_token, parse_again,
        target_collector, counter_style, get_image_from_uri, quote_depth,
        style['quotes'], context, page)
    return box_list or []


def compute_string_set(element, box, string_name, content_list,
                       counter_values, target_collector, counter_style):
    """Parse the content-list value of ``string_name`` for ``string-set``."""
    def parse_again(mixin_pagebased_counters=None):
        """Closure to parse the string-set string value all again."""

        # Neither alters the mixed-in nor the cached counter values, no
        # need to deepcopy here
        if mixin_pagebased_counters is None:
            local_counters = {}
        else:
            local_counters = mixin_pagebased_counters.copy()
        local_counters.update(box.cached_counter_values)

        compute_string_set(
            element, box, string_name, content_list, local_counters,
            target_collector, counter_style)

    css_token = f'string-set::{string_name}'
    box_list = compute_content_list(
        content_list, box, counter_values, css_token, parse_again,
        target_collector, counter_style, element=element)
    if box_list is not None:
        string = ''.join(
            box.text for box in box_list if isinstance(box, boxes.TextBox))
        # Avoid duplicates, care for parse_again and missing counters, don't
        # change the pointer
        for string_set_tuple in box.string_set:
            if string_set_tuple[0] == string_name:
                box.string_set.remove(string_set_tuple)
                break
        box.string_set.append((string_name, string))


def compute_bookmark_label(element, box, content_list, counter_values,
                           target_collector, counter_style):
    """Parses the content-list value for ``bookmark-label``."""
    def parse_again(mixin_pagebased_counters={}):
        """Closure to parse the bookmark-label all again."""
        # Neither alters the mixed-in nor the cached counter values, no
        # need to deepcopy here
        if mixin_pagebased_counters is None:
            local_counters = {}
        else:
            local_counters = mixin_pagebased_counters.copy()
        local_counters = mixin_pagebased_counters.copy()
        local_counters.update(box.cached_counter_values)
        compute_bookmark_label(
            element, box, content_list, local_counters, target_collector,
            counter_style)

    css_token = 'bookmark-label'
    box_list = compute_content_list(
        content_list, box, counter_values, css_token, parse_again,
        target_collector, counter_style, element=element)
    if box_list:
        box.bookmark_label = ''.join(box_text(box) for box in box_list)


def set_content_lists(element, box, style, counter_values, target_collector,
                      counter_style):
    """Set the content-lists values.

    These content-lists are used in GCPM properties like ``string-set`` and
    ``bookmark-label``.

    """
    box.string_set = []
    if style['string_set'] != 'none':
        for i, (string_name, string_values) in enumerate(style['string_set']):
            compute_string_set(
                element, box, string_name, string_values, counter_values,
                target_collector, counter_style)
    if style['bookmark_label'] == 'none':
        box.bookmark_label = ''
    else:
        compute_bookmark_label(
            element, box, style['bookmark_label'], counter_values,
            target_collector, counter_style)


def update_counters(state, style):
    """Handle the ``counter-*`` properties."""
    _quote_depth, counter_values, counter_scopes = state
    sibling_scopes = counter_scopes[-1]

    for name, value in style['counter_reset']:
        if name in sibling_scopes:
            counter_values[name].pop()
        else:
            sibling_scopes.add(name)
        counter_values.setdefault(name, []).append(value)

    for name, value in style['counter_set']:
        values = counter_values.setdefault(name, [])
        if not values:
            assert name not in sibling_scopes
            sibling_scopes.add(name)
            values.append(0)
        values[-1] = value

    counter_increment = style['counter_increment']
    if counter_increment == 'auto':
        # 'auto' is the initial value but is not valid in stylesheet:
        # there was no counter-increment declaration for this element.
        # (Or the winning value was 'initial'.)
        # https://drafts.csswg.org/css-lists-3/#declaring-a-list-item
        if 'list-item' in style['display']:
            counter_increment = [('list-item', 1)]
        else:
            counter_increment = []
    for name, value in counter_increment:
        values = counter_values.setdefault(name, [])
        if not values:
            assert name not in sibling_scopes
            sibling_scopes.add(name)
            values.append(0)
        values[-1] += value


def is_whitespace(box, _has_non_whitespace=re.compile('\\S').search):
    """Return True if ``box`` is a TextBox with only whitespace."""
    return isinstance(box, boxes.TextBox) and not _has_non_whitespace(box.text)


def wrap_improper(box, children, wrapper_type, test=None):
    """
    Wrap consecutive children that do not pass ``test`` in a box of type
    ``wrapper_type``.

    ``test`` defaults to children being of the same type as ``wrapper_type``.

    """
    if test is None:
        def test(child):
            return isinstance(child, wrapper_type)
    improper = []
    for child in children:
        if test(child):
            if improper:
                wrapper = wrapper_type.anonymous_from(box, children=[])
                # Apply the rules again on the new wrapper
                yield table_boxes_children(wrapper, improper)
                improper = []
            yield child
        else:
            # Whitespace either fail the test or were removed earlier,
            # so there is no need to take special care with the definition
            # of "consecutive".
            if isinstance(box, boxes.FlexContainerBox):
                # The display value of a flex item must be "blockified", see
                # https://www.w3.org/TR/css-flexbox-1/#flex-items
                # TODO: These blocks are currently ignored, we should
                # "blockify" them and their children.
                pass
            else:
                improper.append(child)
    if improper:
        wrapper = wrapper_type.anonymous_from(box, children=[])
        # Apply the rules again on the new wrapper
        yield table_boxes_children(wrapper, improper)


def anonymous_table_boxes(box):
    """Remove and add boxes according to the table model.

    Take and return a ``Box`` object.

    See https://www.w3.org/TR/CSS21/tables.html#anonymous-boxes

    """
    if not isinstance(box, boxes.ParentBox) or box.is_running():
        return box

    # Do recursion.
    children = [anonymous_table_boxes(child) for child in box.children]
    return table_boxes_children(box, children)


def table_boxes_children(box, children):
    """Internal implementation of anonymous_table_boxes()."""
    if isinstance(box, boxes.TableColumnBox):  # rule 1.1
        # Remove all children.
        children = []
    elif isinstance(box, boxes.TableColumnGroupBox):  # rule 1.2
        # Remove children other than table-column.
        children = [
            child for child in children
            if isinstance(child, boxes.TableColumnBox)
        ]
        # Rule XXX (not in the spec): column groups have at least
        # one column child.
        if not children:
            if box.span is None or box.span < 1:
                span = 1
            else:
                span = box.span
            children = [boxes.TableColumnBox.anonymous_from(box, [])
                        for _ in range(span)]

    # rule 1.3
    if box.tabular_container and len(children) >= 2:
        # TODO: Maybe only remove text if internal is also
        #       a proper table descendant of box.
        # This is what the spec says, but maybe not what browsers do:
        # https://lists.w3.org/Archives/Public/www-style/2011Oct/0567

        # Last child
        internal, text = children[-2:]
        if (internal.internal_table_or_caption and is_whitespace(text)):
            children.pop()

        # First child
        if len(children) >= 2:
            text, internal = children[:2]
            if (internal.internal_table_or_caption and is_whitespace(text)):
                children.pop(0)

        # Children other than first and last that would be removed by
        # rule 1.3 are also removed by rule 1.4 below.

    children = [
        child
        for prev_child, child, next_child in zip(
            [None] + children[:-1],
            children,
            children[1:] + [None]
        )
        if not (
            # Ignore some whitespace: rule 1.4
            prev_child and prev_child.internal_table_or_caption and
            next_child and next_child.internal_table_or_caption and
            is_whitespace(child)
        )
    ]

    if isinstance(box, boxes.TableBox):
        # Rule 2.1
        children = wrap_improper(
            box, children, boxes.TableRowBox,
            lambda child: child.proper_table_child)
    elif isinstance(box, boxes.TableRowGroupBox):
        # Rule 2.2
        children = wrap_improper(box, children, boxes.TableRowBox)

    if isinstance(box, boxes.TableRowBox):
        # Rule 2.3
        children = wrap_improper(box, children, boxes.TableCellBox)
    else:
        # Rule 3.1
        children = wrap_improper(
            box, children, boxes.TableRowBox,
            lambda child: not isinstance(child, boxes.TableCellBox))

    # Rule 3.2
    if isinstance(box, boxes.InlineBox):
        children = wrap_improper(
            box, children, boxes.InlineTableBox,
            lambda child: not child.proper_table_child)
    else:
        parent_type = type(box)
        children = wrap_improper(
            box, children, boxes.TableBox,
            lambda child: (not child.proper_table_child or
                           parent_type in child.proper_parents))

    if isinstance(box, boxes.TableBox):
        return wrap_table(box, children)
    else:
        box.children = list(children)
        return box


def wrap_table(box, children):
    """Take a table box and return it in its table wrapper box.

    Also re-order children and assign grid positions to each column and cell.

    Because of colspan/rowspan works, grid_y is implicitly the index of a row,
    but grid_x is an explicit attribute on cells, columns and column group.

    https://www.w3.org/TR/CSS21/tables.html#model
    https://www.w3.org/TR/CSS21/tables.html#table-layout

    """
    # Group table children by type
    columns = []
    rows = []
    all_captions = []
    by_type = {
        boxes.TableColumnBox: columns,
        boxes.TableColumnGroupBox: columns,
        boxes.TableRowBox: rows,
        boxes.TableRowGroupBox: rows,
        boxes.TableCaptionBox: all_captions,
    }
    for child in children:
        by_type[type(child)].append(child)

    # Split top and bottom captions
    captions = {'top': [], 'bottom': []}
    for caption in all_captions:
        captions[caption.style['caption_side']].append(caption)

    # Assign X positions on the grid to column boxes
    column_groups = list(wrap_improper(
        box, columns, boxes.TableColumnGroupBox))
    grid_x = 0
    for group in column_groups:
        group.grid_x = grid_x
        if group.children:
            for column in group.children:
                # There's no need to take care of group's span, as "span=x"
                # already generates x TableColumnBox children
                column.grid_x = grid_x
                grid_x += 1
        else:
            grid_x += group.span
    grid_width = grid_x

    row_groups = wrap_improper(box, rows, boxes.TableRowGroupBox)
    # Extract the optional header and footer groups.
    body_row_groups = []
    header = None
    footer = None
    for group in row_groups:
        display = group.style['display']
        if display == ('table-header-group',) and header is None:
            group.is_header = True
            header = group
        elif display == ('table-footer-group',) and footer is None:
            group.is_footer = True
            footer = group
        else:
            body_row_groups.append(group)
    row_groups = (
        ([header] if header is not None else []) +
        body_row_groups +
        ([footer] if footer is not None else []))

    # Assign a (x,y) position in the grid to each cell.
    # rowspan can not extend beyond a row group, so each row group
    # is independent.
    # https://www.w3.org/TR/CSS21/tables.html#table-layout
    # Column 0 is on the left if direction is ltr, right if rtl.
    # This algorithm does not change.
    grid_height = 0
    for group in row_groups:
        # Indexes: row number in the group.
        # Values: set of cells already occupied by row-spanning cells.
        occupied_cells_by_row = [set() for row in group.children]
        for row in group.children:
            occupied_cells_in_this_row = occupied_cells_by_row.pop(0)
            # The list is now about rows after this one.
            grid_x = 0
            for cell in row.children:
                # Make sure that the first grid cell is free.
                while grid_x in occupied_cells_in_this_row:
                    grid_x += 1
                cell.grid_x = grid_x
                new_grid_x = grid_x + cell.colspan
                # https://www.w3.org/TR/html401/struct/tables.html#adef-rowspan
                if cell.rowspan != 1:
                    max_rowspan = len(occupied_cells_by_row) + 1
                    if cell.rowspan == 0:
                        # All rows until the end of the group
                        spanned_rows = occupied_cells_by_row
                        cell.rowspan = max_rowspan
                    else:
                        cell.rowspan = min(cell.rowspan, max_rowspan)
                        spanned_rows = occupied_cells_by_row[:cell.rowspan - 1]
                    spanned_columns = range(grid_x, new_grid_x)
                    for occupied_cells in spanned_rows:
                        occupied_cells.update(spanned_columns)
                grid_x = new_grid_x
                grid_width = max(grid_width, grid_x)
        grid_height += len(group.children)

    table = box.copy_with_children(row_groups)
    table.column_groups = tuple(column_groups)
    if table.style['border_collapse'] == 'collapse':
        table.collapsed_border_grid = collapse_table_borders(
            table, grid_width, grid_height)

    if isinstance(box, boxes.InlineTableBox):
        wrapper_type = boxes.InlineBlockBox
    else:
        wrapper_type = boxes.BlockBox

    wrapper = wrapper_type.anonymous_from(
        box, captions['top'] + [table] + captions['bottom'])
    wrapper.style = wrapper.style.copy()
    wrapper.is_table_wrapper = True
    # Non-inherited properties of the table element apply to one
    # of the wrapper and the table. The other get the initial value.
    # TODO: put this in a method of the table object
    for name in properties.TABLE_WRAPPER_BOX_PROPERTIES:
        wrapper.style[name] = table.style[name]
        table.style[name] = properties.INITIAL_VALUES[name]

    return wrapper


TRANSPARENT = tinycss2.color3.parse_color('transparent')


def collapse_table_borders(table, grid_width, grid_height):
    """Resolve border conflicts for a table in the collapsing border model.

    Take a :class:`TableBox`; set appropriate border widths on the table,
    column group, column, row group, row, and cell boxes; and return
    a data structure for the resolved collapsed border grid.

    """
    if not (grid_width and grid_height):
        # Don’t bother with empty tables
        return [], []

    style_scores = dict((v, i) for i, v in enumerate(reversed([
        'hidden', 'double', 'solid', 'dashed', 'dotted', 'ridge',
        'outset', 'groove', 'inset', 'none'])))
    style_map = {'inset': 'ridge', 'outset': 'groove'}
    weak_null_border = (
        (0, 0, style_scores['none']), ('none', 0, TRANSPARENT))
    vertical_borders = [[weak_null_border for x in range(grid_width + 1)]
                        for y in range(grid_height)]
    horizontal_borders = [[weak_null_border for x in range(grid_width)]
                          for y in range(grid_height + 1)]

    def set_one_border(border_grid, box_style, side, grid_x, grid_y):
        from ..draw import get_color

        style = box_style[f'border_{side}_style']
        width = box_style[f'border_{side}_width']
        color = get_color(box_style, f'border_{side}_color')

        # https://www.w3.org/TR/CSS21/tables.html#border-conflict-resolution
        score = ((1 if style == 'hidden' else 0), width, style_scores[style])

        style = style_map.get(style, style)
        previous_score, _ = border_grid[grid_y][grid_x]
        # Strict < so that the earlier call wins in case of a tie.
        if previous_score < score:
            border_grid[grid_y][grid_x] = (score, (style, width, color))

    def set_borders(box, x, y, w, h):
        style = box.style
        for yy in range(y, y + h):
            set_one_border(vertical_borders, style, 'left', x, yy)
            set_one_border(vertical_borders, style, 'right', x + w, yy)
        for xx in range(x, x + w):
            set_one_border(horizontal_borders, style, 'top', xx, y)
            set_one_border(horizontal_borders, style, 'bottom', xx, y + h)

    # The order is important here:
    # "A style set on a cell wins over one on a row, which wins over a
    #  row group, column, column group and, lastly, table"
    # See https://www.w3.org/TR/CSS21/tables.html#border-conflict-resolution
    strong_null_border = (
        (1, 0, style_scores['hidden']), ('hidden', 0, TRANSPARENT))
    grid_y = 0
    for row_group in table.children:
        for row in row_group.children:
            for cell in row.children:
                # No border inside of a cell with rowspan or colspan
                for xx in range(cell.grid_x + 1, cell.grid_x + cell.colspan):
                    for yy in range(grid_y, grid_y + cell.rowspan):
                        vertical_borders[yy][xx] = strong_null_border
                for xx in range(cell.grid_x, cell.grid_x + cell.colspan):
                    for yy in range(grid_y + 1, grid_y + cell.rowspan):
                        horizontal_borders[yy][xx] = strong_null_border
                # The cell’s own borders
                set_borders(cell, x=cell.grid_x, y=grid_y,
                            w=cell.colspan, h=cell.rowspan)
            grid_y += 1

    grid_y = 0
    for row_group in table.children:
        for row in row_group.children:
            set_borders(row, x=0, y=grid_y, w=grid_width, h=1)
            grid_y += 1

    grid_y = 0
    for row_group in table.children:
        rowspan = len(row_group.children)
        set_borders(row_group, x=0, y=grid_y, w=grid_width, h=rowspan)
        grid_y += rowspan

    for column_group in table.column_groups:
        for column in column_group.children:
            set_borders(column, x=column.grid_x, y=0, w=1, h=grid_height)

    for column_group in table.column_groups:
        set_borders(column_group, x=column_group.grid_x, y=0,
                    w=column_group.span, h=grid_height)

    set_borders(table, x=0, y=0, w=grid_width, h=grid_height)

    # Now that all conflicts are resolved, set transparent borders of
    # the correct widths on each box. The actual border grid will be
    # painted separately.
    def set_transparent_border(box, side, twice_width):
        box.style[f'border_{side}_style'] = 'solid'
        box.style[f'border_{side}_width'] = twice_width / 2
        box.style[f'border_{side}_color'] = TRANSPARENT

    def remove_borders(box):
        set_transparent_border(box, 'top', 0)
        set_transparent_border(box, 'right', 0)
        set_transparent_border(box, 'bottom', 0)
        set_transparent_border(box, 'left', 0)

    def max_vertical_width(x, y, h):
        return max(
            width for grid_row in vertical_borders[y:y + h]
            for _, (_, width, _) in [grid_row[x]])

    def max_horizontal_width(x, y, w):
        return max(
            width for _, (_, width, _) in horizontal_borders[y][x:x + w])

    grid_y = 0
    for row_group in table.children:
        remove_borders(row_group)
        for row in row_group.children:
            remove_borders(row)
            for cell in row.children:
                set_transparent_border(cell, 'top', max_horizontal_width(
                    x=cell.grid_x, y=grid_y, w=cell.colspan))
                set_transparent_border(cell, 'bottom', max_horizontal_width(
                    x=cell.grid_x, y=grid_y + cell.rowspan, w=cell.colspan))
                set_transparent_border(cell, 'left', max_vertical_width(
                    x=cell.grid_x, y=grid_y, h=cell.rowspan))
                set_transparent_border(cell, 'right', max_vertical_width(
                    x=cell.grid_x + cell.colspan, y=grid_y, h=cell.rowspan))
            grid_y += 1

    for column_group in table.column_groups:
        remove_borders(column_group)
        for column in column_group.children:
            remove_borders(column)

    set_transparent_border(table, 'top', max_horizontal_width(
        x=0, y=0, w=grid_width))
    set_transparent_border(table, 'bottom', max_horizontal_width(
        x=0, y=grid_height, w=grid_width))
    # "UAs must compute an initial left and right border width for the table
    #  by examining the first and last cells in the first row of the table."
    # https://www.w3.org/TR/CSS21/tables.html#collapsing-borders
    # ... so h=1, not grid_height:
    set_transparent_border(table, 'left', max_vertical_width(
        x=0, y=0, h=1))
    set_transparent_border(table, 'right', max_vertical_width(
        x=grid_width, y=0, h=1))

    return vertical_borders, horizontal_borders


def flex_boxes(box):
    """Remove and add boxes according to the flex model.

    Take and return a ``Box`` object.

    See https://www.w3.org/TR/css-flexbox-1/#flex-items

    """
    if not isinstance(box, boxes.ParentBox) or box.is_running():
        return box

    # Do recursion.
    children = [flex_boxes(child) for child in box.children]
    box.children = flex_children(box, children)
    return box


def flex_children(box, children):
    if isinstance(box, boxes.FlexContainerBox):
        flex_children = []
        for child in children:
            if not child.is_absolutely_positioned():
                child.is_flex_item = True
            if isinstance(child, boxes.TextBox) and not child.text.strip(' '):
                # TODO: ignore texts only containing "characters that can be
                # affected by the white-space property"
                # https://www.w3.org/TR/css-flexbox-1/#flex-items
                continue
            if isinstance(child, boxes.InlineLevelBox):
                anonymous = boxes.BlockBox.anonymous_from(box, [child])
                anonymous.is_flex_item = True
                flex_children.append(anonymous)
            else:
                flex_children.append(child)
        return flex_children
    else:
        return children


def process_whitespace(box, following_collapsible_space=False):
    """First part of "The 'white-space' processing model".

    See https://www.w3.org/TR/CSS21/text.html#white-space-model
    https://drafts.csswg.org/css-text-3/#white-space-rules

    """
    if isinstance(box, boxes.TextBox):
        text = box.text
        if not text:
            return following_collapsible_space

        # Normalize line feeds
        text = LINE_FEED_RE.sub('\n', text)

        new_line_collapse = box.style['white_space'] in ('normal', 'nowrap')
        space_collapse = box.style['white_space'] in (
            'normal', 'nowrap', 'pre-line')

        if space_collapse:
            # \r characters were removed/converted earlier
            text = TAB_RE.sub('\n', text)

        if new_line_collapse:
            # TODO: this should be language-specific
            # Could also replace with a zero width space character (U+200B),
            # or no character
            # CSS3: https://www.w3.org/TR/css-text-3/#overflow-wrap
            text = text.replace('\n', ' ')

        if space_collapse:
            previous_text = text = SPACE_RE.sub(' ', text)
            if following_collapsible_space and text.startswith(' '):
                text = text[1:]
                box.leading_collapsible_space = True
            following_collapsible_space = previous_text.endswith(' ')
        else:
            following_collapsible_space = False

        box.text = text

    elif isinstance(box, boxes.ParentBox):
        for child in box.children:
            if isinstance(child, (boxes.TextBox, boxes.InlineBox)):
                child_collapsible_space = process_whitespace(
                    child, following_collapsible_space)
                if box.is_in_normal_flow() and child.is_in_normal_flow():
                    following_collapsible_space = child_collapsible_space
            elif child.is_in_normal_flow():
                following_collapsible_space = False

    return following_collapsible_space and not box.is_running()


def process_text_transform(box):
    if isinstance(box, boxes.TextBox):
        text_transform = box.style['text_transform']
        if text_transform != 'none':
            box.text = {
                'uppercase': lambda text: text.upper(),
                'lowercase': lambda text: text.lower(),
                'capitalize': capitalize,
                'full-width': lambda text: text.translate(ASCII_TO_WIDE),
            }[text_transform](box.text)
        if box.style['hyphens'] == 'none':
            box.text = box.text.replace('\u00AD', '')  # U+00AD is soft hyphen

    elif isinstance(box, boxes.ParentBox) and not box.is_running():
        for child in box.children:
            if isinstance(child, (boxes.TextBox, boxes.InlineBox)):
                process_text_transform(child)


def capitalize(text):
    """Capitalize words according to CSS’s "text-transform: capitalize"."""
    letter_found = False
    output = ''
    for letter in text:
        category = unicodedata.category(letter)[0]
        if not letter_found and category in ('L', 'N'):
            letter_found = True
            letter = letter.upper()
        elif category == 'Z':
            letter_found = False
        output += letter
    return output


def inline_in_block(box):
    """Build the structure of lines inside blocks and return a new box tree.

    Consecutive inline-level boxes in a block container box are wrapped into a
    line box, itself wrapped into an anonymous block box.

    This line box will be broken into multiple lines later.

    This is the first case in
    https://www.w3.org/TR/CSS21/visuren.html#anonymous-block-level

    Eg.::

        BlockBox[
            TextBox['Some '],
            InlineBox[TextBox['text']],
            BlockBox[
                TextBox['More text'],
            ]
        ]

    is turned into::

        BlockBox[
            AnonymousBlockBox[
                LineBox[
                    TextBox['Some '],
                    InlineBox[TextBox['text']],
                ]
            ]
            BlockBox[
                LineBox[
                    TextBox['More text'],
                ]
            ]
        ]

    """
    if not isinstance(box, boxes.ParentBox) or box.is_running():
        return box

    box_children = list(box.children)

    if box_children and box.leading_collapsible_space is False:
        box.leading_collapsible_space = (
            box_children[0].leading_collapsible_space)

    children = []
    trailing_collapsible_space = False
    for child in box_children:
        # Keep track of removed collapsing spaces for wrap opportunities, and
        # remove empty text boxes.
        # (They may have been emptied by process_whitespace().)

        if trailing_collapsible_space:
            child.leading_collapsible_space = True

        if isinstance(child, boxes.TextBox) and not child.text:
            trailing_collapsible_space = child.leading_collapsible_space
        else:
            trailing_collapsible_space = False
            children.append(inline_in_block(child))

    if box.trailing_collapsible_space is False:
        box.trailing_collapsible_space = trailing_collapsible_space

    if not isinstance(box, boxes.BlockContainerBox):
        box.children = children
        return box

    new_line_children = []
    new_children = []

    for child_box in children:
        assert not isinstance(child_box, boxes.LineBox)
        if new_line_children and child_box.is_absolutely_positioned():
            new_line_children.append(child_box)
        elif isinstance(child_box, boxes.InlineLevelBox) or (
                new_line_children and not child_box.is_in_normal_flow()):
            # Do not append white space at the start of a line:
            # It would be removed during layout.
            if new_line_children or not (
                    isinstance(child_box, boxes.TextBox) and
                    # Sequence of white-space was collapsed to a single
                    # space by process_whitespace().
                    child_box.text == ' ' and
                    child_box.style['white_space'] in (
                        'normal', 'nowrap', 'pre-line')):
                new_line_children.append(child_box)
        else:
            if new_line_children:
                # Inlines are consecutive no more: add this line box
                # and create a new one.
                line_box = boxes.LineBox.anonymous_from(box, new_line_children)
                anonymous = boxes.BlockBox.anonymous_from(box, [line_box])
                new_children.append(anonymous)
                new_line_children = []
            new_children.append(child_box)
    if new_line_children:
        # There were inlines at the end
        line_box = boxes.LineBox.anonymous_from(box, new_line_children)
        if new_children:
            anonymous = boxes.BlockBox.anonymous_from(box, [line_box])
            new_children.append(anonymous)
        else:
            # Only inline-level children: one line box
            new_children.append(line_box)

    box.children = new_children
    return box


def block_in_inline(box):
    """Build the structure of blocks inside lines.

    Inline boxes containing block-level boxes will be broken in two
    boxes on each side on consecutive block-level boxes, each side wrapped
    in an anonymous block-level box.

    This is the second case in
    https://www.w3.org/TR/CSS21/visuren.html#anonymous-block-level

    Eg. if this is given::

        BlockBox[
            LineBox[
                InlineBox[
                    TextBox['Hello.'],
                ],
                InlineBox[
                    TextBox['Some '],
                    InlineBox[
                        TextBox['text']
                        BlockBox[LineBox[TextBox['More text']]],
                        BlockBox[LineBox[TextBox['More text again']]],
                    ],
                    BlockBox[LineBox[TextBox['And again.']]],
                ]
            ]
        ]

    this is returned::

        BlockBox[
            AnonymousBlockBox[
                LineBox[
                    InlineBox[
                        TextBox['Hello.'],
                    ],
                    InlineBox[
                        TextBox['Some '],
                        InlineBox[TextBox['text']],
                    ]
                ]
            ],
            BlockBox[LineBox[TextBox['More text']]],
            BlockBox[LineBox[TextBox['More text again']]],
            AnonymousBlockBox[
                LineBox[
                    InlineBox[
                    ]
                ]
            ],
            BlockBox[LineBox[TextBox['And again.']]],
            AnonymousBlockBox[
                LineBox[
                    InlineBox[
                    ]
                ]
            ],
        ]

    """
    if not isinstance(box, boxes.ParentBox) or box.is_running():
        return box

    new_children = []
    changed = False

    for child in box.children:
        if isinstance(child, boxes.LineBox):
            assert len(box.children) == 1, (
                'Line boxes should have no '
                'siblings at this stage, got %r.' % box.children)
            stack = None
            while True:
                new_line, block, stack = _inner_block_in_inline(
                    child, skip_stack=stack)
                if block is None:
                    break
                anon = boxes.BlockBox.anonymous_from(box, [new_line])
                new_children.append(anon)
                new_children.append(block_in_inline(block))
                # Loop with the same child and the new stack.
            if new_children:
                # Some children were already added, this became a block
                # context.
                new_child = boxes.BlockBox.anonymous_from(box, [new_line])
            else:
                # Keep the single line box as-is, without anonymous blocks.
                new_child = new_line
        else:
            # Not in an inline formatting context.
            new_child = block_in_inline(child)

        if new_child is not child:
            changed = True
        new_children.append(new_child)

    if changed:
        box.children = new_children
    return box


def _inner_block_in_inline(box, skip_stack=None):
    """Find a block-level box in an inline formatting context.

    If one is found, return ``(new_box, block_level_box, resume_at)``.
    ``new_box`` contains all of ``box`` content before the block-level box.
    ``resume_at`` can be passed as ``skip_stack`` in a new call to
    this function to resume the search just after the block-level box.

    If no block-level box is found after the position marked by
    ``skip_stack``, return ``(new_box, None, None)``

    """
    new_children = []
    block_level_box = None
    resume_at = None
    changed = False

    is_start = skip_stack is None
    if is_start:
        skip = 0
    else:
        (skip, skip_stack), = skip_stack.items()

    for i, child in enumerate(box.children[skip:]):
        index = i + skip
        if (isinstance(child, boxes.BlockLevelBox) and
                child.is_in_normal_flow()):
            assert skip_stack is None  # Should not skip here
            block_level_box = child
            index += 1  # Resume *after* the block
        else:
            if isinstance(child, boxes.InlineBox):
                recursion = _inner_block_in_inline(child, skip_stack)
                skip_stack = None
                new_child, block_level_box, resume_at = recursion
            else:
                assert skip_stack is None  # Should not skip here
                new_child = block_in_inline(child)
                # block_level_box is still None.
            if new_child is not child:
                changed = True
            new_children.append(new_child)
        if block_level_box is not None:
            resume_at = {index: resume_at}
            box = box.copy_with_children(new_children)
            break
    else:
        if changed or skip:
            box = box.copy_with_children(new_children)

    return box, block_level_box, resume_at


def set_viewport_overflow(root_box):
    """
    Set a ``viewport_overflow`` attribute on the box for the root element.

    Like backgrounds, ``overflow`` on the root element must be propagated
    to the viewport.

    See https://www.w3.org/TR/CSS21/visufx.html#overflow
    """
    chosen_box = root_box
    if (root_box.element_tag.lower() == 'html' and
            root_box.style['overflow'] == 'visible'):
        for child in root_box.children:
            if child.element_tag.lower() == 'body':
                chosen_box = child
                break

    root_box.viewport_overflow = chosen_box.style['overflow']
    chosen_box.style['overflow'] = 'visible'
    return root_box


def box_text(box):
    if isinstance(box, boxes.TextBox):
        return box.text
    elif isinstance(box, boxes.ParentBox):
        return ''.join(
            child.text for child in box.descendants()
            if not child.element_tag.endswith('::before') and
            not child.element_tag.endswith('::after') and
            not child.element_tag.endswith('::marker') and
            isinstance(child, boxes.TextBox))
    return ''


def extract_text(text_part, box):
    if text_part in ('text', 'content'):
        return box_text(box)
    elif text_part in ('before', 'after'):
        if isinstance(box, boxes.ParentBox):
            return ''.join(
                box_text(child) for child in box.descendants()
                if child.element_tag.endswith(f'::{text_part}') and
                not isinstance(child, boxes.ParentBox))
        return ''
    elif text_part == 'first-letter':
        # TODO: use the same code as in inlines.first_letter_to_box
        character_found = False
        first_letter = ''
        text = box_text(box)
        for letter in text:
            category = unicodedata.category(letter)
            if category not in ('Ps', 'Pe', 'Pi', 'Pf', 'Po'):
                if character_found:
                    break
                character_found = True
            first_letter += letter
        return first_letter