Source code for imgutils.tagging.blacklist

"""
Overview:
    Detect and drop some blacklisted tags, which are listed `here <https://huggingface.co/datasets/alea31415/tag_filtering/blob/main/blacklist_tags.txt>`_.
"""
from functools import lru_cache
from typing import Union, List, Mapping, Set, Optional, Tuple

from huggingface_hub import hf_hub_download

from .match import _words_to_matcher, _split_to_words


@lru_cache()
def _load_online_blacklist() -> List[str]:
    """
    Load the online blacklist tags from the specified dataset repository.

    :return: List of blacklisted tags.
    :rtype: List[str]
    """
    with open(hf_hub_download(
            'alea31415/tag_filtering',
            'blacklist_tags.txt',
            repo_type='dataset',
    ), 'r') as f:
        return [line.strip() for line in f if line.strip()]


@lru_cache()
def _online_blacklist_set() -> Set[Tuple[str, ...]]:
    """
    Get the online blacklist as a set.

    :return: Set of blacklisted tags.
    :rtype: Set[str]
    """
    set_ = set()
    for tag in _load_online_blacklist():
        set_ = set_ | _words_to_matcher(_split_to_words(tag))
    return set_


def _is_blacklisted(tag: str, blacklist_set: Set[Tuple[str, ...]]) -> bool:
    _tag_matcher = _words_to_matcher(_split_to_words(tag))
    return bool(set(_tag_matcher & blacklist_set))


[docs]def is_blacklisted(tag: str) -> bool: """ Check if any of the given tags are blacklisted. :param tag: Tags to be checked. :type tag: str :return: True if any tag is blacklisted, False otherwise. :rtype: bool Examples:: >>> from imgutils.tagging import is_blacklisted >>> >>> is_blacklisted('cosplay') True >>> is_blacklisted('no_eyewear') True >>> is_blacklisted('no eyewear') # span does not matter True >>> is_blacklisted('red_hair') False """ return _is_blacklisted(tag, _online_blacklist_set())
[docs]def drop_blacklisted_tags(tags: Union[List[str], Mapping[str, float]], use_presets: bool = True, custom_blacklist: Optional[List[str]] = None) \ -> Union[List[str], Mapping[str, float]]: """ Drop blacklisted tags from the given list or mapping of tags. :param tags: List or mapping of tags to be filtered. :type tags: Union[List[str], Mapping[str, float]] :param use_presets: Whether to use the online blacklist presets, defaults to True. :type use_presets: bool, optional :param custom_blacklist: Custom blacklist to be used, defaults to None. :type custom_blacklist: Optional[List[str]], optional :return: Filtered list or mapping of tags without the blacklisted ones. :rtype: Union[List[str], Mapping[str, float]] :raises TypeError: If the input tags are neither a list nor a dictionary. Examples:: >>> from imgutils.tagging import drop_blacklisted_tags >>> >>> drop_blacklisted_tags({ ... 'solo': 1.0, '1girl': 0.95, ... 'cosplay': 0.7, 'no_eyewear': 0.6, ... }) {'solo': 1.0, '1girl': 0.95} >>> drop_blacklisted_tags(['solo', '1girl', 'cosplay', 'no_eyewear']) ['solo', '1girl'] """ blacklist = set() if use_presets: blacklist = blacklist | _online_blacklist_set() for tag in (custom_blacklist or []): blacklist = blacklist | _words_to_matcher(_split_to_words(tag)) if isinstance(tags, dict): return {tag: value for tag, value in tags.items() if not _is_blacklisted(tag, blacklist)} elif isinstance(tags, list): return [tag for tag in tags if not _is_blacklisted(tag, blacklist)] else: raise TypeError(f"Unsupported types of tags, dict or list expected, but {tags!r} found.")