Source code for ads.data_labeling.visualizer.image_visualizer

#!/usr/bin/env python
# -*- coding: utf-8; -*-

# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

"""
The module that helps to visualize Image Dataset.

Methods
-------
    render(items: List[LabeledImageItem], options: Dict = None)
        Renders Labeled Image dataset.

Examples
--------
>>> bbox1 = BoundingBoxItem(bottom_left=(0.3, 0.4),
>>>                        top_left=(0.3, 0.09),
>>>                        top_right=(0.86, 0.09),
>>>                        bottom_right=(0.86, 0.4),
>>>                        labels=['dolphin', 'fish'])

>>> record1 = LabeledImageItem(img_obj1, [bbox1])

>>> bbox2 = BoundingBoxItem(bottom_left=(0.2, 0.4),
>>>                        top_left=(0.2, 0.2),
>>>                        top_right=(0.8, 0.2),
>>>                        bottom_right=(0.8, 0.4),
>>>                        labels=['dolphin'])
>>> bbox3 = BoundingBoxItem(bottom_left=(0.5, 1.0),
>>>                        top_left=(0.5, 0.8),
>>>                        top_right=(0.8, 0.8),
>>>                        bottom_right=(0.8, 1.0),
>>>                        labels=['shark'])

>>> record2 = LabeledImageItem(img_obj2, [bbox2, bbox3])
>>> render(items = [record1, record2], options={"default_color":"blue", "colors": {"dolphin":"blue", "whale":"red"}})
"""

from dataclasses import asdict, dataclass
from typing import Dict, List, Optional, Tuple

from matplotlib.colors import is_color_like
import matplotlib.pyplot as plt
from ads.common import logger
import os
import numpy as np
import pandas as pd
from ads.data_labeling.boundingbox import BoundingBoxItem
from PIL.ImageFile import ImageFile
from ads.data_labeling.constants import AnnotationType

DEFAULT_COLOR = "white"
IMG_FORMAT = [".jpg", ".jpeg", ".png"]


[docs] class WrongEntityFormat(ValueError): def __init__(self): super().__init__( "Invalid labels from the dataset, " f"cannot construct a valid BoundingBoxItem." )
[docs] @dataclass class LabeledImageItem: """Data class representing Image Item. Attributes ---------- img: ImageFile the labeled image object. boxes: List[BoundingBoxItem] a list of BoundingBoxItem """ img: ImageFile boxes: List[BoundingBoxItem] def _validate(self): """Validates the instance. Raises ------ ValueError If image object is empty. WrongEntityFormat If the list of entities has a wrong format. """ if self.img is None: raise ValueError("The parameter `img` is required.") if not isinstance(self.img, ImageFile): raise ValueError( "The parameter `img` must be an object of type `PIL.ImageFile.ImageFile`." ) if any(not isinstance(entity, BoundingBoxItem) for entity in self.boxes): raise WrongEntityFormat() def __post_init__(self): self._validate()
[docs] @dataclass class RenderOptions: """Data class representing render options. Attributes ---------- default_color: str The specified default color. colors: Optional[dict] The multiple specified colors. """ default_color: str colors: Optional[dict] @staticmethod def _validate(options: dict) -> None: """Validate whether the options passed in fits the defined schema. Parameters ---------- options: dict The multiple specified colors. Returns ------- None Nothing. Raises ------ ValueError If color provided is not valid. """ if not options: return None colorvalues = set(options.get("colors", {}).values()) if "default_color" in options: colorvalues.add(options["default_color"]) for colorval in colorvalues: if not is_color_like(colorval): raise ValueError( f"{colorval} is not supported. " f"Use RGB format for colors. For instance: `'#EEEEEE'` or `'green'`." )
[docs] @classmethod def from_dict(cls, options: dict) -> "RenderOptions": """Constructs an instance of RenderOptions from a dictionary. Parameters ---------- options: dict Render options in dictionary format. Returns ------- RenderOptions The instance of RenderOptions. """ if not options: return cls(default_color=DEFAULT_COLOR, colors={}) RenderOptions._validate(options) return cls( options.get("default_color", DEFAULT_COLOR), options.get("colors", {}) or {} )
[docs] def to_dict(self): """Converts RenderOptions instance to dictionary format. Returns ------- dict The render options in dictionary format. """ return asdict(self)
def __repr__(self) -> str: return repr(self.to_dict())
[docs] class ImageLabeledDataFormatter: """The ImageRender class to render Image items in a notebook session."""
[docs] @staticmethod def render_item( item: LabeledImageItem, options: Dict = None, path: str = None ) -> None: """Renders image dataset. Parameters ---------- item: LabeledImageItem Item to render. options: Optional[dict] Render options. path: str Path to save the image with annotations to local directory. Returns ------- None Nothing. Raises ------ ValueError If items not provided. If path is not valid. TypeError If items provided in a wrong format. """ if not item: raise ValueError("The parameter `item` is required.") if not isinstance(item, LabeledImageItem): raise TypeError( "The parameter `item` must be an object of type `LabeledImageItem`." ) render_options = RenderOptions.from_dict(options) if path: if os.path.isdir(path): path += "1.jpg" elif not path.lower().endswith(tuple(IMG_FORMAT)): img_format_str = ( IMG_FORMAT[0] if len(IMG_FORMAT) == 1 else ", ".join(IMG_FORMAT[:-1] + ["and " + IMG_FORMAT[-1]]) ) raise ValueError( f"Invalid {path}. It is not a directory or the image format " f"in {path} is not supported. Currently the support types " f"are `{img_format_str}`." ) # drow the image with annotations ImageLabeledDataFormatter()._draw_labels( item=item, options=render_options, path=path )
def _draw_labels( self, item: LabeledImageItem, options: Dict = None, path: str = None, figure_size: Tuple = (6, 8), fontsize: int = 14, ): """Draw image with annotations. Parameters ---------- item: LabeledImageItem Item to render. options: Optional[dict] Render options. path: str Path to save the image with annotations to local directory. figure_size: Tuple Figure size of the rendered image. fontsize: int Font size of the annotations. Returns ------- None Nothing. Raises ------ TypeError If image type is not PIL.ImageFile.ImageFile. """ img = item.img # using matplotlib to open image return numpy array if hasattr(img, "shape"): im_height, im_width, _ = img.shape elif hasattr(img, "size"): im_width, im_height = img.size else: raise TypeError("The input image type must be `PIL.ImageFile.ImageFile`.") fig, ax = plt.subplots(1, 1, figsize=figure_size) ax.imshow(img) for ent in item.boxes: category_name = ", ".join([str(label) for label in ent.labels]) left, top, width, height = self._calculate_bbx(im_width, im_height, ent) if len(ent.labels) > 1: # sort the multiple labels and use the tuple as key to look up in colormap # note: tuple of labels provided as key in colormap must be sorted. color_key = tuple(sorted(ent.labels)) color = options.colors.get(color_key, options.default_color) else: color = options.colors.get(category_name, options.default_color) rect = plt.Rectangle( (left, top), width, height, fill=False, linewidth=2, edgecolor=color, ) ax.add_patch(rect) props = dict(boxstyle="round", facecolor=color, alpha=0.6) ax.text( left, top, category_name, fontsize=fontsize, color="black", verticalalignment="top", bbox=props, ) ax.axis("off") if path: plt.savefig(path, bbox_inches="tight") logger.info(f"The annotated image file is saved in {path}.") # Add this line to not show image plt.close(fig) return plt.show() def _calculate_bbx(self, im_width, im_height, bbox): """calculate bounding box coordinates Parameters ---------- im_width: float width of the image in pixels im_height: float height of the image in pixels bbox: BoundingBoxItem Returns ------- Tuple left, top, width, height of the image """ left = bbox.top_left[0] * im_width top = bbox.top_left[1] * im_height width = (bbox.top_right[0] - bbox.top_left[0]) * im_width height = (bbox.bottom_left[1] - bbox.top_left[1]) * im_height return left, top, width, height def _convert_pil_to_nparray(self, img): """convert pil image object to numpy array Parameters ---------- img: PIL.ImageFile.ImageFile Returns ------- numpy.ndarray """ return np.array(img)
def _df_to_bbox_items( df: pd.DataFrame, content_column="Content", annotations_column: str = "Annotations", categories: List[str] = None, ) -> List[LabeledImageItem]: """Converts pandas dataframe into a list of LabeledImageItem objects. Parameters ---------- df: pd.DataFrame The Pandas dataframe to convert. content_column: Optional[str] The column name with the content data. annotations_column: Optional[str] The column name for the annotations list. categories: Optional List[str] The list of object categories in proper order for model training. Only used when bounding box annotations are in YOLO format. Example: ['cat','dog','horse'] Returns ------- List[LabeledImageItem] The list of LabeledImageItem objects. Raises ------ TypeError If input data is not a pandas dataframe. ValueError If input data has a wrong format. """ if not isinstance(df, pd.DataFrame): raise TypeError("The parameter `df` must be a Pandas dataframe.") if content_column not in list(df.columns): raise ValueError( "Wrong format of input dataframe. It must have " f"`{content_column}` column." ) if annotations_column not in list(df.columns): raise ValueError( "Wrong format of input dataframe. It must have " f"`{annotations_column}` column." ) if df[content_column].isnull().values.any(): logger.warning( "The source Dataframe includes records where content is not loaded. " "Use `materialize=True` to load the content. " "The records with empty content will be ignored." ) result = [] for item in df.T.to_dict().values(): if item[annotations_column] and not isinstance(item[annotations_column], list): raise ValueError( "The parameter `df` is invalid. " f"The column {annotations_column}  must be of type `List[BoundingBoxItem]`." ) if item[content_column]: if ( isinstance(item[annotations_column][0], list) and item[annotations_column][0][0] and isinstance(item[annotations_column][0][0], Tuple) and len(item[annotations_column][0][0]) == 5 ): bbox_items = [ BoundingBoxItem.from_yolo(bbox, categories) for bbox in item[annotations_column] ] else: bbox_items = item[annotations_column] or [] result.append(LabeledImageItem(item[content_column], bbox_items)) return result
[docs] def render( items: List[LabeledImageItem], options: Dict = None, path: str = None ) -> None: """Render image dataset. Parameters ---------- items: List[LabeledImageItem] The list of LabeledImageItem to render. options: dict, optional The options for rendering. path: str Path to save the images with annotations to local directory. Returns ------- None Nothing. Raises ------ ValueError If items not provided. If path is not valid. TypeError If items provided in a wrong format. Examples -------- >>> bbox1 = BoundingBoxItem(bottom_left=(0.3, 0.4), >>> top_left=(0.3, 0.09), >>> top_right=(0.86, 0.09), >>> bottom_right=(0.86, 0.4), >>> labels=['dolphin', 'fish']) >>> record1 = LabeledImageItem(img_obj1, [bbox1]) >>> render(items = [record1]) """ if not items: raise ValueError("The parameter `items` is required.") if not isinstance(items, list) or not all( isinstance(x, LabeledImageItem) for x in items ): raise TypeError( "Wrong format for the items. The items must be `List[LabeledImageItem]`." ) for idx, item in enumerate(items): if not path: ImageLabeledDataFormatter.render_item(item, options) else: if os.path.isdir(path): ImageLabeledDataFormatter.render_item( item, options, path=f"{path}_{idx+1}.jpg" ) elif not path.lower().endswith(tuple(IMG_FORMAT)): img_format_str = ( IMG_FORMAT[0] if len(IMG_FORMAT) == 1 else ", ".join(IMG_FORMAT[:-1] + ["and " + IMG_FORMAT[-1]]) ) raise ValueError( f"Invalid {path}. It is not a directory or the image format " f"in {path} is not supported. Currently the support " f"types are {img_format_str}." ) else: img_type = path.split(".")[-1] path_root = ".".join(path.split(".")[:-1]) new_path = f"{path_root}_{idx+1}.{img_type}" ImageLabeledDataFormatter.render_item(item, options, path=new_path)