Source code for jam.utils.codec.primitives.strings

"""
String codec implementation for JAM protocol.

Implements encoding and decoding of string values according to the JAM specification.
Strings are encoded with a length prefix followed by UTF-8 encoded bytes.

Format:
    [Length: u64][UTF-8 encoded bytes]

The length is encoded using little-endian u64 format to match specification and
ensure compatibility with the maximum possible string size.
"""

from typing import Union, Tuple
from jam.utils.codec.primitives.integers import GeneralCodec
from jam.utils.codec.codec import Codec
from jam.utils.codec.errors import EncodeError, DecodeError
from jam.utils.codec.utils import check_buffer_size, ensure_size


[docs] class StringCodec(Codec[str]): """ Codec for string values. Handles both str and static str references with UTF-8 encoding. Maximum string length is determined by u64 max value. """ @staticmethod def _encode(value: Union[str, bytes]) -> bytes: if isinstance(value, str): return bytes(value, "utf-8") elif isinstance(value, bytes): return value else: raise EncodeError(0, 0, f"Expected str or bytes, got {type(value)}")
[docs] def encode_size(self, value: Union[str, bytes]) -> int: """ Calculate the number of bytes needed to encode the string. The size includes: - 8 bytes for length prefix (u64) - bytes needed for UTF-8 encoded string content Args: value: String to encode Returns: Total number of bytes needed Raises: EncodeError: If string is too large to encode """ enc_len = len(StringCodec._encode(value)) return GeneralCodec().encode_size(enc_len) + enc_len
[docs] def encode_into(self, value: str, buffer: bytearray, offset: int = 0) -> int: """ Encode a string into the provided buffer. Args: value: String to encode buffer: Target buffer offset: Starting position in buffer Returns: Number of bytes written Raises: EncodeError: If buffer is too small or string cannot be encoded """ if not isinstance(value, str): raise EncodeError(0, 0, f"Expected str, got {type(value)}") try: # Encode the string first to get actual byte length encoded_content = StringCodec._encode(value) encoded_length = len(encoded_content) total_size = GeneralCodec().encode_size(encoded_length) + encoded_length check_buffer_size(buffer, total_size, offset) # Write length prefix using encoded byte length length_size = GeneralCodec().encode_into(encoded_length, buffer, offset) # Write string content buffer[offset + length_size : offset + total_size] = encoded_content return total_size except UnicodeEncodeError as e: raise EncodeError(0, 0, f"Failed to UTF-8 encode string: {e}")
[docs] @staticmethod def decode_from( buffer: Union[bytes, bytearray, memoryview], offset: int = 0 ) -> Tuple[str, int]: """ Decode a string from the provided buffer. Args: buffer: Source buffer offset: Starting position in buffer Returns: Tuple of (decoded string, bytes read) Raises: DecodeError: If buffer is too small or contains invalid UTF-8 """ # Read length prefix length, length_size = GeneralCodec().decode_from(buffer, offset) # Ensure we have enough bytes for content total_size = length_size + length ensure_size(buffer, total_size, offset) try: # Extract and decode content content = buffer[offset + length_size : offset + total_size] string = bytes(content).decode("utf-8") return string, total_size except UnicodeDecodeError as e: raise DecodeError(0, 0, f"Invalid UTF-8 sequence in buffer: {e}")
# Codec instance string_codec = StringCodec()