Source code for zyte_parsers.brand

from __future__ import annotations

import itertools
from typing import TYPE_CHECKING

from .api import input_to_element
from .utils import extract_text, iterwalk_limited, take

if TYPE_CHECKING:
    from collections.abc import Iterable

    from lxml.html import HtmlElement

    from . import SelectorOrElement


[docs]def extract_brand_name(node: SelectorOrElement, search_depth: int = 0) -> str | None: """Extract a brand name from a node that contains it. It tries element text and image alt and title attributes. :param node: Node including the brand name. :param search_depth: Max depth for searching images. :return: The brand name or None. """ _BRAND_LENGHT_LIMIT = 50 node = input_to_element(node) extracted = _extract_brand(node, search_depth) short = (b for b in extracted if b and len(b) < _BRAND_LENGHT_LIMIT) results = take(short, 1) return results[0] if results else None
def _extract_brand(node: HtmlElement, search_depth: int = 0) -> Iterable[str | None]: if node.tag == "img": return extract_image_text(node, 0) value = extract_text(node) if value: return [value] return extract_image_text(node, search_depth) def extract_image_text(node: HtmlElement, search_depth: int = 0) -> Iterable[str]: def extract_text_from_image(node: HtmlElement) -> Iterable[str | None]: for attrib in ["alt", "title"]: yield (node.attrib.get(attrib) or "").strip() nodes = iterwalk_limited(node, search_depth) images = filter(lambda n: n.tag == "img", nodes) attribs = map(extract_text_from_image, images) flat_attribs = itertools.chain.from_iterable(attribs) return (a for a in flat_attribs if a)