Source code for imgutils.metadata.lsb.read

"""
This module provides functionality for extracting hidden metadata from images using
LSB (Least Significant Bit) steganography.

It includes two main classes:

1. LSBExtractor: Extracts bits and bytes from image data.
2. ImageLsbDataExtractor: Uses LSBExtractor to extract and decode hidden JSON data from images.

The module is based on the implementation from the NovelAI project (https://github.com/NovelAI/novelai-image-metadata).

Usage:
    >>> from PIL import Image
    >>>
    >>> # Load an image
    >>> image = Image.open('path_to_image.png')
    >>>
    >>> # Create an extractor
    >>> extractor = ImageLsbDataExtractor()
    >>>
    >>> # Extract metadata
    >>> metadata = extractor.extract_data(image)
    >>>
    >>> # Process the extracted metadata
    >>> print(metadata)
"""

import gzip
import json
import zlib

import numpy as np
from PIL import Image

from imgutils.data import ImageTyping
from ...data import load_image


[docs]class LSBExtractor: """ A class for extracting data hidden in the least significant bits of image pixels. This class provides methods to extract individual bits, bytes, and multi-byte values from image data using LSB steganography techniques. :param data: The image data as a numpy array. :type data: np.ndarray """
[docs] def __init__(self, data: np.ndarray): """ Initialize the LSBExtractor with image data. :param data: The image data as a numpy array. :type data: np.ndarray """ self.data = data self.rows, self.cols, self.dim = data.shape self.bits = 0 self.byte = 0 self.row = 0 self.col = 0
def _extract_next_bit(self): """ Extract the next bit from the image data. This method updates the internal state of the extractor, moving to the next pixel as necessary. :raises IOError: If there are no more bits to extract. """ if self.row < self.rows and self.col < self.cols: bit = self.data[self.row, self.col, self.dim - 1] & 1 self.bits += 1 self.byte <<= 1 self.byte |= bit self.row += 1 if self.row == self.rows: self.row = 0 self.col += 1 else: raise IOError('Cannot read more bits.')
[docs] def get_one_byte(self): """ Extract and return one byte of data. This method extracts 8 bits from the image data to form a single byte. :return: A single byte of extracted data. :rtype: bytearray """ while self.bits < 8: self._extract_next_bit() byte = bytearray([self.byte]) self.bits = 0 self.byte = 0 return byte
[docs] def get_next_n_bytes(self, n): """ Extract and return the next n bytes of data. This method extracts multiple bytes from the image data. :param n: The number of bytes to extract. :type n: int :return: The extracted bytes. :rtype: bytearray """ bytes_list = bytearray() for _ in range(n): byte = self.get_one_byte() if not byte: break bytes_list.extend(byte) return bytes_list
[docs] def read_32bit_integer(self): """ Extract and return a 32-bit integer from the image data. This method reads 4 bytes and interprets them as a big-endian 32-bit integer. :return: The extracted 32-bit integer, or None if not enough data is available. :rtype: int or None """ bytes_list = self.get_next_n_bytes(4) if len(bytes_list) == 4: integer_value = int.from_bytes(bytes_list, byteorder='big') return integer_value else: return None
[docs]class ImageLsbDataExtractor: """ A class for extracting hidden JSON data from images using LSB steganography. This class uses the LSBExtractor to read hidden data from an image, expecting a specific magic number and format for the hidden data. :param magic: The magic string used to identify the start of the hidden data. :type magic: str """
[docs] def __init__(self, magic: str = "stealth_pngcomp"): """ Initialize the ImageLsbDataExtractor with a magic string. :param magic: The magic string used to identify the start of the hidden data. :type magic: str """ self._magic_bytes = magic.encode('utf-8')
[docs] def extract_data(self, image: Image.Image) -> bytes: """ Extract hidden data from the given image. This method checks for the magic number, reads the length of the hidden data, and then extracts the data. :param image: The image to extract data from. :type image: Image.Image :return: The extracted raw data. :rtype: bytes :raises ValueError: If the image is not in RGBA mode or if the magic number doesn't match. """ if image.mode != 'RGBA': raise ValueError(f'Image should be in RGBA mode, but {image.mode!r} found.') # noinspection PyTypeChecker image = np.array(image) reader = LSBExtractor(image) read_magic = reader.get_next_n_bytes(len(self._magic_bytes)) if not (self._magic_bytes == read_magic): raise ValueError(f'Image magic number mismatch, ' f'{self._magic_bytes!r} expected but {read_magic!r}.') next_int = reader.read_32bit_integer() if next_int is None: raise ValueError('No next int32 to read.') read_len = next_int // 8 raw_data = reader.get_next_n_bytes(read_len) return raw_data
[docs]class LSBReadError(Exception): """ Custom exception class for LSB reading errors. This exception is raised when there's an error during the LSB data extraction process. :param err: The original exception that caused the LSB read error. :type err: Exception """ def __init__(self, err: Exception): """ Initialize the LSBReadError with the original exception. :param err: The original exception that caused the LSB read error. :type err: Exception """ Exception.__init__(self, (f'LSB Read Error - {err!r}', err)) self.error = err
[docs]def read_lsb_raw_bytes(image: ImageTyping) -> bytes: """ Read raw bytes of LSB-encoded data from an image. This function loads the image and uses ImageLsbDataExtractor to extract the hidden data. :param image: The image to extract data from. Can be a file path, URL, or Image object. :type image: ImageTyping :return: The extracted raw data. :rtype: bytes :raises LSBReadError: If there's an error during the extraction process. """ image = load_image(image, mode=None, force_background=None) try: return ImageLsbDataExtractor().extract_data(image) except (ValueError, OSError, IOError, EOFError) as err: # ValueError: binary data with wrong format # IOError, EOFError: unable to read more from images # UnicodeDecodeError: cannot decode as utf-8 text raise LSBReadError(err)
[docs]def read_lsb_metadata(image: ImageTyping): """ Read and decode LSB-encoded metadata from an image. This function extracts the raw bytes, decompresses them using gzip, and then decodes the result as a JSON object. :param image: The image to extract metadata from. Can be a file path, URL, or Image object. :type image: ImageTyping :return: The decoded metadata as a Python object. :rtype: dict :raises LSBReadError: If there's an error during the extraction or decoding process. """ try: raw_data = read_lsb_raw_bytes(image) return json.loads(gzip.decompress(raw_data).decode("utf-8")) except (json.JSONDecodeError, zlib.error, gzip.BadGzipFile, EOFError, UnicodeDecodeError) as err: # zlib.error, gzip.BadGzipFile: unable to decompress via zlib method # json.JSONDecodeError, EOFError: not a json-formatted data # UnicodeDecodeError: cannot decode as utf-8 text raise LSBReadError(err)