`pixano_inference.utils.media`

Image utilities.

`compress_rle(rle)`

Compress an RLE encoded mask.

Parameters:

Name	Type	Description	Default
`rle`	`dict[str, Any]`	RLE encoded mask as a dictionary.	required

Returns:

Type	Description
`dict[str, Any \| str]`	Compressed RLE encoded mask as a string.

Source code in pixano_inference/utils/media.py

def compress_rle(rle: dict[str, Any]) -> dict[str, Any | str]:
    """Compress an RLE encoded mask.

    Args:
        rle: RLE encoded mask as a dictionary.

    Returns:
        Compressed RLE encoded mask as a string.
    """
    counts = np.array(rle["counts"], dtype=np.uint32).tobytes()
    rle["counts"] = base64.b64encode(counts).decode("utf-8")
    return rle

`convert_image_pil_to_tensor(image, device, size=None)`

Convert an image in PIL format to a PyTorch tensor and optionally resize it.

Source code in pixano_inference/utils/media.py

def convert_image_pil_to_tensor(image: Image, device: "torch.device", size: int | None = None) -> "Tensor":
    """Convert an image in PIL format to a PyTorch tensor and optionally resize it."""
    assert_torch_installed()
    image = image.convert("RGB")
    if size is not None:
        image = image.resize((size, size))
    image_np = np.array(image) / 255.0
    image = torch.from_numpy(image_np).to(device=device).permute(2, 0, 1)
    return image

`convert_string_to_image(str_image)`

Convert a string or path to an image.

Parameters:

Name	Type	Description	Default
`str_image`	`str \| Path`	Image as a string or path.	required

Returns:

Type	Description
`Image`	Image.

Source code in pixano_inference/utils/media.py

def convert_string_to_image(str_image: str | Path) -> Image.Image:
    """Convert a string or path to an image.

    Args:
        str_image: Image as a string or path.

    Returns:
        Image.
    """
    if isinstance(str_image, str):
        if is_url(str_image):
            image_pil = Image.open(requests.get(str_image, stream=True).raw)
        else:
            if is_base64_image(str_image):
                image_bytes = base64.b64decode(extract_media_from_base64(str_image))
                image_pil = Image.open(BytesIO(image_bytes))
            elif Path(str_image).exists():
                image_pil = Image.open(str_image)
            else:
                raise ValueError("The image is not a valid path, URL or base64 string.")
    elif isinstance(str_image, Path):
        image_pil = Image.open(str_image)
    else:
        raise ValueError("The image is not a valid path, URL or base64 string.")
    image_converted = image_pil.convert("RGB")
    return image_converted

`convert_string_video_to_bytes_or_path(str_video)`

Convert a string to a video or video path.

Parameters:

Name	Type	Description	Default
`str_video`	`str \| Path`	Video as a string or path.	required

Returns:

Type	Description
`bytes \| Path`	The video.

Source code in pixano_inference/utils/media.py

def convert_string_video_to_bytes_or_path(str_video: str | Path) -> bytes | Path:
    """Convert a string to a video or video path.

    Args:
        str_video: Video as a string or path.

    Returns:
        The video.
    """
    if isinstance(str_video, str):
        if is_url(str_video):
            video_bytes = requests.get(str_video, stream=True).raw
        else:
            if is_base64_video(str_video):
                video_bytes = base64.b64decode(extract_media_from_base64(str_video))
            elif Path(str_video).exists():
                return Path(str_video)
            else:
                raise ValueError("The image is not a valid path, URL or base64 string.")
        return video_bytes
    elif isinstance(str_video, Path):
        return str_video
    else:
        raise ValueError("The image is not a valid path, URL or base64 string.")

`decode_rle_to_mask(rle)`

Decode an RLE encoded mask.

Parameters:

Name	Type	Description	Default
`rle`	`dict`	RLE encoded mask as a dictionary.	required

Returns:

Type	Description
`ndarray`	Decoded binary mask of shape (height, width).

Source code in pixano_inference/utils/media.py

def decode_rle_to_mask(rle: dict) -> np.ndarray:
    """Decode an RLE encoded mask.

    Args:
        rle: RLE encoded mask as a dictionary.

    Returns:
        Decoded binary mask of shape (height, width).
    """
    height, width = rle["size"]
    mask = np.empty(height * width, dtype=bool)
    idx = 0
    parity = False
    for count in rle["counts"]:
        mask[idx : idx + count] = parity
        idx += count
        parity = not parity
    mask = mask.reshape(width, height)
    return mask.transpose()  # Reshape to original shape

`decompress_rle(rle)`

Decompress a compressed RLE encoded mask.

Parameters:

Name	Type	Description	Default
`rle`	`dict[str, Any]`	Compressed RLE encoded mask as a string.	required

Returns:

Type	Description
`dict[str, Any]`	Decompressed RLE encoded mask as a dictionary.

Source code in pixano_inference/utils/media.py

def decompress_rle(rle: dict[str, Any]) -> dict[str, Any]:
    """Decompress a compressed RLE encoded mask.

    Args:
        rle: Compressed RLE encoded mask as a string.

    Returns:
        Decompressed RLE encoded mask as a dictionary.
    """
    rle["counts"] = np.frombuffer(base64.b64decode(rle["counts"]), dtype=np.uint32).tolist()
    return rle

`encode_mask_to_rle(mask)`

Encode a binary mask using RLE.

Parameters:

Name	Type	Description	Default
`mask`	`Tensor`	A binary mask of shape (height, width).	required

Returns:

Type	Description
`dict[str, list[int]]`	RLE encoded mask as a dictionary.

Source code in pixano_inference/utils/media.py

def encode_mask_to_rle(mask: "Tensor") -> dict[str, list[int]]:
    """Encode a binary mask using RLE.

    Args:
        mask: A binary mask of shape (height, width).

    Returns:
        RLE encoded mask as a dictionary.
    """
    assert_torch_installed()
    rle = {"counts": [], "size": list(mask.shape)}
    mask = mask.permute(1, 0).flatten()
    diff_arr = torch.diff(mask)
    nonzero_indices = torch.where(diff_arr != 0)[0] + 1
    lengths = torch.diff(torch.concatenate((torch.tensor([0]), nonzero_indices, torch.tensor([len(mask)]))))

    # note that the odd counts are always the numbers of zeros
    if mask[0] == 1:
        lengths = torch.concatenate(([0], lengths))

    rle["counts"] = lengths.tolist()

    return rle

`extract_media_from_base64(string)`

Extract from a base64 media the actual base64 part.

Source code in pixano_inference/utils/media.py

def extract_media_from_base64(string: str) -> str:
    """Extract from a base64 media the actual base64 part."""
    match = match_base64_media(string)
    if match is None:
        raise ValueError("The string does not match the expected format.")
    return string[len(match.group(1)) :]

`is_base64_image(string)`

Check if a string is a base64 image.

The expected format is "data:image/{image_format};base64,{base64}".

Source code in pixano_inference/utils/media.py

def is_base64_image(string: str) -> bool:
    """Check if a string is a base64 image.

    The expected format is "data:image/{image_format};base64,{base64}".
    """
    return is_base64_media(string, "image")

`is_base64_media(string, media)`

Check if a string is a base64 media.

The expected format is "data:{media}/{image_format};base64,{base64}".

Source code in pixano_inference/utils/media.py

def is_base64_media(string: str, media: str | None) -> bool:
    """Check if a string is a base64 media.

    The expected format is "data:{media}/{image_format};base64,{base64}".
    """
    return match_base64_media(string, media) is not None

`is_base64_video(string)`

Check if a string is a base64 video.

The expected format is "data:video/{video_format};base64,{base64}".

Source code in pixano_inference/utils/media.py

def is_base64_video(string: str) -> bool:
    """Check if a string is a base64 video.

    The expected format is "data:video/{video_format};base64,{base64}".
    """
    return is_base64_media(string, "video")

`match_base64_media(string, media=None)`

Match a base64 media.

Source code in pixano_inference/utils/media.py

def match_base64_media(string: str, media: str | None = None) -> re.Match[str] | None:
    """Match a base64 media."""
    regex_media_base64 = rf"^(data:{media if media is not None else '[a-zA-Z]'}/[a-zA-Z]+;base64,)"
    return re.match(regex_media_base64, string)