Source code for imgutils.metrics.dbaesthetic

"""
Overview:
    A tool for assessing the aesthetic quality of anime images using a pre-trained model,
    based on danbooru dataset and metadata analysis result of
    `HakuBooru <https://github.com/KohakuBlueleaf/HakuBooru>`_ by
    `KohakuBlueleaf <https://github.com/KohakuBlueleaf>`_.

    .. image:: dbaesthetic_full.plot.py.svg
        :align: center

    This is an overall benchmark of all the operations in aesthetic models:

    .. image:: dbaesthetic_benchmark.plot.py.svg
        :align: center

"""
from typing import Dict, Optional, Tuple

import numpy as np
from huggingface_hub import hf_hub_download

from ..data import ImageTyping
from ..generic import ClassifyModel
from ..utils import vreplace

__all__ = [
    'anime_dbaesthetic',
]

_DEFAULT_MODEL_NAME = 'swinv2pv3_v0_448_ls0.2_x'
_REPO_ID = 'deepghs/anime_aesthetic'
_LABELS = ["worst", "low", "normal", "good", "great", "best", "masterpiece"]
_DEFAULT_LABEL_MAPPING = {
    'masterpiece': 0.95,
    'best': 0.85,
    'great': 0.75,
    'good': 0.5,
    'normal': 0.25,
    'low': 0.1,
    'worst': 0.0,
}


class AestheticModel:
    """
    A model for assessing the aesthetic quality of anime images.
    """

    def __init__(self, repo_id: str):
        """
        Initializes an AestheticModel instance.

        :param repo_id: The repository ID of the aesthetic assessment model.
        :type repo_id: str
        """
        self.repo_id = repo_id
        self.classifier = ClassifyModel(repo_id)
        self.cached_samples: Dict[str, Tuple] = {}

    def get_aesthetic_score(self, image: ImageTyping, model_name: str) -> Tuple[float, Dict[str, float]]:
        """
        Calculates the aesthetic score and confidence for an anime image.

        :param image: The input anime image.
        :type image: ImageTyping
        :param model_name: The name of the aesthetic assessment model to use.
        :type model_name: str
        :return: A tuple containing the aesthetic score and confidence.
        :rtype: Tuple[float, Dict[str, float]]
        """
        scores = self.classifier.predict_score(image, model_name)
        return sum(scores[label] * i for i, label in enumerate(_LABELS)), scores

    def _get_xy_samples(self, model_name: str):
        """
        Retrieves cached samples for aesthetic assessment.

        :param model_name: The name of the aesthetic assessment model.
        :type model_name: str
        :return: Cached samples for aesthetic assessment.
        :rtype: Tuple[Tuple[np.ndarray, float, float], Tuple[np.ndarray, float, float]]
        """
        if model_name not in self.cached_samples:
            stacked = np.load(hf_hub_download(
                repo_id=self.repo_id,
                repo_type='model',
                filename=f'{model_name}/samples.npz',
            ))['arr_0']
            x, y = stacked[0], stacked[1]
            self.cached_samples[model_name] = ((x, x.min(), x.max()), (y, y.min(), y.max()))
        return self.cached_samples[model_name]

    def score_to_percentile(self, score: float, model_name: str) -> float:
        """
        Converts an aesthetic score to a percentile rank.

        :param score: The aesthetic score.
        :type score: float
        :param model_name: The name of the aesthetic assessment model to use.
        :type model_name: str
        :return: The percentile rank corresponding to the given score.
        :rtype: float
        """
        (x, x_min, x_max), (y, y_min, y_max) = self._get_xy_samples(model_name)
        idx = np.searchsorted(x, np.clip(score, a_min=x_min, a_max=x_max))
        if idx < x.shape[0] - 1:
            x0, y0 = x[idx], y[idx]
            x1, y1 = x[idx + 1], y[idx + 1]
            if np.isclose(x1, x0):
                return y[idx]
            else:
                return np.clip((score - x0) / (x1 - x0) * (y1 - y0) + y0, a_min=y_min, a_max=y_max)
        else:
            return y[idx]

    @classmethod
    def percentile_to_label(cls, percentile: float, mapping: Optional[Dict[str, float]] = None) -> str:
        """
        Converts a percentile rank to an aesthetic label.

        :param percentile: The percentile rank.
        :type percentile: float
        :param mapping: A dictionary mapping labels to percentile thresholds.
        :type mapping: Optional[Dict[str, float]]
        :return: The aesthetic label corresponding to the given percentile rank.
        :rtype: str
        """
        mapping = mapping or _DEFAULT_LABEL_MAPPING
        for label, threshold in sorted(mapping.items(), key=lambda x: (-x[1], x[0])):
            if percentile >= threshold:
                return label
        else:
            raise ValueError(f'No label for unknown percentile {percentile:.3f}.')  # pragma: no cover

    def get_aesthetic(self, image: ImageTyping, model_name: str, fmt=('label', 'percentile')):
        """
        Analyzes the aesthetic quality of an anime image and returns the results in the specified format.

        :param image: The input anime image.
        :type image: ImageTyping
        :param model_name: The name of the aesthetic assessment model to use.
        :type model_name: str
        :param fmt: The format of the output.
        :type fmt: Tuple[str, ...]
        :return: A dictionary containing the aesthetic assessment results.
        :rtype: Dict[str, float]
        """
        score, confidence = self.get_aesthetic_score(image, model_name)
        percentile = self.score_to_percentile(score, model_name)
        label = self.percentile_to_label(percentile)
        return vreplace(
            v=fmt,
            mapping={
                'label': label,
                'percentile': percentile,
                'score': score,
                'confidence': confidence,
            }
        )

    def clear(self):
        """
        Clears the internal state of the AestheticModel instance.
        """
        self.classifier.clear()
        self.cached_samples.clear()


_MODEL = AestheticModel(_REPO_ID)


[docs]def anime_dbaesthetic(image: ImageTyping, model_name: str = _DEFAULT_MODEL_NAME,
                      fmt=('label', 'percentile')):
    """
    Analyzes the aesthetic quality of an anime image using a pre-trained model.

    :param image: The input anime image.
    :type image: ImageTyping
    :param model_name: The name of the aesthetic assessment model to use. Default is _DEFAULT_MODEL_NAME.
    :type model_name: str
    :param fmt: The format of the output. Default is ('label', 'percentile').
    :type fmt: Tuple[str, ...]
    :return: A dictionary containing the aesthetic assessment results.
    :rtype: Dict[str, float]

    Examples::
        >>> from imgutils.metrics import anime_dbaesthetic
        >>>
        >>> anime_dbaesthetic('masterpiece.jpg')
        ('masterpiece', 0.9831666690063624)
        >>> anime_dbaesthetic('best.jpg')
        ('best', 0.8810615667538594)
        >>> anime_dbaesthetic('great.jpg')
        ('great', 0.8225559148288356)
        >>> anime_dbaesthetic('good.jpg')
        ('good', 0.591020403706702)
        >>> anime_dbaesthetic('normal.jpg')
        ('normal', 0.2888798940585766)
        >>> anime_dbaesthetic('low.jpg')
        ('low', 0.243279223969715)
        >>> anime_dbaesthetic('worst.jpg')
        ('worst', 0.005268185993767627)

        * Custom format

        >>> anime_dbaesthetic('masterpiece.jpg', fmt=('label', 'percentile', 'score'))
        ('masterpiece', 0.9831666690063624, 5.275707557797432)
        >>> anime_dbaesthetic('best.jpg', fmt=('label', 'percentile', 'score'))
        ('best', 0.8810615667538594, 4.7977807857096195)
        >>> anime_dbaesthetic('great.jpg', fmt=('label', 'percentile', 'score'))
        ('great', 0.8225559148288356, 4.56098810210824)
        >>> anime_dbaesthetic('good.jpg', fmt=('label', 'percentile', 'score'))
        ('good', 0.591020403706702, 3.670568235218525)
        >>> anime_dbaesthetic('normal.jpg', fmt=('label', 'percentile', 'score'))
        ('normal', 0.2888798940585766, 2.1677918508648872)
        >>> anime_dbaesthetic('low.jpg', fmt=('label', 'percentile', 'score'))
        ('low', 0.243279223969715, 1.9305131509900093)
        >>> anime_dbaesthetic('worst.jpg', fmt=('label', 'percentile', 'score'))
        ('worst', 0.005268185993767627, 0.6085879728198051)

        * Get confidence

        >>> anime_dbaesthetic('masterpiece.jpg', fmt='confidence')
        {'masterpiece': 0.6834832429885864, 'best': 0.16141420602798462, 'great': 0.05435194447636604, 'good': 0.025083942338824272, 'normal': 0.024000568315386772, 'low': 0.027076328173279762, 'worst': 0.024589713662862778}
        >>> anime_dbaesthetic('best.jpg', fmt='confidence')
        {'masterpiece': 0.3757021427154541, 'best': 0.3451208472251892, 'great': 0.1511985808610916, 'good': 0.04740551486611366, 'normal': 0.02172713913023472, 'low': 0.027498546987771988, 'worst': 0.03134724497795105}
        >>> anime_dbaesthetic('great.jpg', fmt='confidence')
        {'masterpiece': 0.39281174540519714, 'best': 0.22457796335220337, 'great': 0.15563568472862244, 'good': 0.10796019434928894, 'normal': 0.047730278223752975, 'low': 0.0393439345061779, 'worst': 0.031940147280693054}
        >>> anime_dbaesthetic('good.jpg', fmt='confidence')
        {'masterpiece': 0.13832266628742218, 'best': 0.20687267184257507, 'great': 0.2509062886238098, 'good': 0.1644320785999298, 'normal': 0.11332042515277863, 'low': 0.08270663768053055, 'worst': 0.043439216911792755}
        >>> anime_dbaesthetic('normal.jpg', fmt='confidence')
        {'masterpiece': 0.033693961799144745, 'best': 0.03375888615846634, 'great': 0.050045162439346313, 'good': 0.16734018921852112, 'normal': 0.4311050772666931, 'low': 0.23242227733135223, 'worst': 0.05163438618183136}
        >>> anime_dbaesthetic('low.jpg', fmt='confidence')
        {'masterpiece': 0.012833272106945515, 'best': 0.01619996316730976, 'great': 0.03074900433421135, 'good': 0.1396280825138092, 'normal': 0.5038207173347473, 'low': 0.22299200296401978, 'worst': 0.07377689331769943}
        >>> anime_dbaesthetic('worst.jpg', fmt='confidence')
        {'masterpiece': 0.02854202501475811, 'best': 0.026677291840314865, 'great': 0.02838410809636116, 'good': 0.026617199182510376, 'normal': 0.02508518099784851, 'low': 0.06039097160100937, 'worst': 0.8043031692504883}
    """
    return _MODEL.get_aesthetic(image, model_name, fmt)