Skip to content

dqm_ml_images.visual_features

Visual feature extraction processor for image quality assessment.

This module contains the VisualFeaturesProcessor class that extracts visual quality features from images including luminosity, contrast, blur, and entropy.

logger = logging.getLogger(__name__) module-attribute

VisualFeaturesProcessor

Bases: DatametricProcessor

Computes basic image quality features per sample.

Features
  • Luminosity: Mean intensity of the image. By default, it is the average gray level mapped to the [0, 1] range.
  • Contrast: RMS contrast, calculated as the standard deviation of the gray level intensities, mapped to the [0, 1] range.
  • Blur: Measured as the variance of the Laplacian of the image. A higher value indicates more edges and higher sharpness.
  • Entropy: Shannon entropy of the image's grayscale histogram. Measures the information content or complexity.

This processor operates purely at the feature extraction level (per-sample).

Source code in packages/dqm-ml-images/src/dqm_ml_images/visual_features.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
class VisualFeaturesProcessor(DatametricProcessor):
    """
    Computes basic image quality features per sample.

    Features:
      - Luminosity: Mean intensity of the image. By default, it is the average gray level
        mapped to the [0, 1] range.
      - Contrast: RMS contrast, calculated as the standard deviation of the gray level
        intensities, mapped to the [0, 1] range.
      - Blur: Measured as the variance of the Laplacian of the image. A higher value
        indicates more edges and higher sharpness.
      - Entropy: Shannon entropy of the image's grayscale histogram. Measures the
        information content or complexity.

    This processor operates purely at the feature extraction level (per-sample).
    """

    DEFAULT_OUTPUTS = {
        "luminosity": "m_luminosity",
        "contrast": "m_contrast",
        "blur": "m_blur_level",
        "entropy": "m_entropy",
    }

    def __init__(self, name: str = "visual_metric", config: dict[str, Any] | None = None) -> None:
        """
        Initialize the visual features processor.

        Args:
            name: Unique name of the processor instance.
            config: Configuration dictionary containing:
                - input_columns: List containing the name of the image column (bytes or path).
                - output_features: Mapping of feature names to output column names.
                - grayscale: Whether to convert images to grayscale (default: True).
                - normalize: Whether to normalize pixel values to [0, 1] (default: True).
                - entropy_bins: Number of bins for entropy calculation (default: 256).
                - clip_percentiles: Tuple of (low, high) percentiles for intensity clipping.
                - laplacian_kernel: Size of the Laplacian kernel ('3x3' or '5x5').
                - dataset_root_path: Root directory for relative image paths.
        """
        super().__init__(name, config)

        # Local view of config for convenience
        cfg = self.config or {}

        # handle relative paths in parquet to a dataset located at dataset_root_path
        self.dataset_root_path = str(cfg.get("dataset_root_path", "undefined"))

        if not hasattr(self, "input_columns") or not self.input_columns:
            self.input_columns = ["image_bytes"]

        if not hasattr(self, "output_features") or not self.output_features:
            # Use config-provided mapping if present, otherwise defaults
            cfg_outputs = cfg.get("output_features") if isinstance(cfg.get("output_features"), dict) else None
            self.output_features: Any = (
                cfg_outputs.copy() if isinstance(cfg_outputs, dict) else self.DEFAULT_OUTPUTS.copy()
            )

        # param
        self.grayscale: bool = bool(cfg.get("grayscale", True))
        self.normalize: bool = bool(cfg.get("normalize", True))
        self.entropy_bins: int = int(cfg.get("entropy_bins", 256))

        # TODO written to remove noqa 501 and type check error in same line, to be fixed properly later
        if cfg.get("clip_percentiles") is not None:
            self.clip_percentiles = tuple(cfg.get("clip_percentiles"))  # type: ignore
        else:
            self.clip_percentiles = None  # type: ignore

        self.laplacian_kernel: str = str(cfg.get("laplacian_kernel", "3x3"))

        # check if the transformation is defined in the processor
        if not isinstance(self.output_features, dict):
            raise ValueError(f"[{self.name}] 'output_features' must be a dict of metric->column_name")
        for k in ("luminosity", "contrast", "blur", "entropy"):
            if k not in self.output_features:
                self.output_features[k] = self.DEFAULT_OUTPUTS[k]

    @override
    def compute_features(
        self,
        batch: pa.RecordBatch,
        prev_features: dict[str, pa.Array] | None = None,
    ) -> dict[str, pa.Array]:
        """Compute per-sample image features.

        Args:
            batch: Input batch of data containing image column.
            prev_features: Previously computed features (not used in this processor).

        Returns:
            Dictionary mapping feature names to their computed values.
        """
        if not self.input_columns:
            logger.warning(f"[{self.name}] no input_columns configured")
            return {}

        image_column = self.input_columns[0]
        if image_column not in batch.schema.names:
            logger.warning(f"[{self.name}] column '{image_column}' not found in batch")
            return {}

        col = batch.column(image_column)
        values = col.to_pylist()  #
        # Use grayscale image
        gray_images: list[Any] = []
        for idx, v in enumerate(values):
            try:
                gray = self._to_gray_np(v)
                if self.clip_percentiles is not None:
                    p_lo, p_hi = self.clip_percentiles
                    lo = np.percentile(gray, p_lo)
                    hi = np.percentile(gray, p_hi)
                    if hi > lo:
                        gray = np.clip(gray, lo, hi)
                        if self.normalize:
                            gray = (gray - lo) / max(1e-12, (hi - lo))
                gray_images.append(gray)
            except Exception as e:
                logger.exception(f"[{self.name}] failed to process sample {idx}: {e}")
                gray_images.append(None)

        # Compute each feature type with dedicated functions
        features = {}
        features[self.output_features["luminosity"]] = self._compute_luminosity_feature(gray_images)
        features[self.output_features["contrast"]] = self._compute_contrast_feature(gray_images)
        features[self.output_features["blur"]] = self._compute_blur_feature(gray_images)
        features[self.output_features["entropy"]] = self._compute_entropy_feature(gray_images)
        return features

    @override
    def compute_batch_metric(self, features: dict[str, pa.Array]) -> dict[str, pa.Array]:
        """No-op aggregation: metrics are image-level only.

        Returns:
            Empty dictionary as this processor computes features only.
        """
        return {}

    @override
    def compute(self, batch_metrics: dict[str, pa.Array] | None = None) -> dict[str, pa.Array]:
        """No dataset-level aggregation required for this processor.

        Returns:
            Empty dictionary as features are computed at batch level.
        """
        return {}

    def reset(self) -> None:
        """Reset processor state for new processing run."""

    # TODO : Check if it can be vectorized, parallelized

    def _compute_luminosity_feature(self, gray_images: list[np.ndarray | None]) -> pa.Array:
        """Compute luminosity (mean gray level) for each image.

        Args:
            gray_images: List of grayscale image arrays (or None for failed images).

        Returns:
            PyArrow array of luminosity values.
        """
        values = []
        for gray in gray_images:
            if gray is not None:
                # Original logic: if not self.normalize, it's uint8 [0,255], divide by 255
                # If self.normalize, it's already [0,1] (min-max)
                luminosity = float(np.mean(gray if self.normalize else gray / 255.0))
                values.append(luminosity)
            else:
                values.append(float("nan"))
        return pa.array(values, type=pa.float32())

    def _compute_contrast_feature(self, gray_images: list[np.ndarray | None]) -> pa.Array:
        """Compute contrast (RMS contrast = std of gray) for each image.

        Args:
            gray_images: List of grayscale image arrays (or None for failed images).

        Returns:
            PyArrow array of contrast values.
        """
        values = []
        for gray in gray_images:
            if gray is not None:
                contrast = float(np.std(gray if self.normalize else gray / 255.0))
                values.append(contrast)
            else:
                values.append(float("nan"))
        return pa.array(values, type=pa.float32())

    def _compute_blur_feature(self, gray_images: list[np.ndarray | None]) -> pa.Array:
        """Compute blur (variance of Laplacian) for each image.

        Args:
            gray_images: List of grayscale image arrays (or None for failed images).

        Returns:
            PyArrow array of blur values.
        """
        values = []
        for gray in gray_images:
            if gray is not None:
                blur_val = float(self._variance_of_laplacian(gray))
                values.append(blur_val)
            else:
                values.append(float("nan"))
        return pa.array(values, type=pa.float32())

    def _compute_entropy_feature(self, gray_images: list[np.ndarray | None]) -> pa.Array:
        """Compute entropy (Shannon entropy) for each image.

        Args:
            gray_images: List of grayscale image arrays (or None for failed images).

        Returns:
            PyArrow array of entropy values.
        """
        values = []
        for gray in gray_images:
            if gray is not None:
                entropy_val = float(self._entropy(gray))
                values.append(entropy_val)
            else:
                values.append(float("nan"))
        return pa.array(values, type=pa.float32())

    # --- helpers --------------------------------------------------------------

    def _to_gray_np(self, x: Any) -> np.ndarray:
        """Convert various input types to a 2D grayscale numpy array.

        If `self.normalize` is True, returns float32 in [0,1]. Otherwise returns uint8 [0,255].

        Args:
            x: Input data (PIL Image, bytes, string path, or numpy array).

        Returns:
            2D numpy array in grayscale.

        Raises:
            ValueError: If input type is unsupported or path does not exist.
        """
        img: Image.Image | None = None

        if isinstance(x, Image.Image):
            img = x
        elif isinstance(x, (bytes, bytearray)):
            img = Image.open(io.BytesIO(x))
        elif isinstance(x, str):
            img_path = Path(self.dataset_root_path) / x if self.dataset_root_path != "undefined" else Path(x)
            if img_path.is_file():
                img = Image.open(img_path)
            else:
                raise ValueError(f"Path does not exist: {img_path}")
        elif isinstance(x, np.ndarray):
            arr = x
            if arr.ndim == 2:  # already gray
                gray = arr
            elif arr.ndim == 3 and arr.shape[2] in (3, 4):
                # manual luminance conversion to be independent of PIL for ndarray
                rgb = arr[..., :3].astype(np.float32)
                gray = 0.2126 * rgb[..., 0] + 0.7152 * rgb[..., 1] + 0.0722 * rgb[..., 2]
            else:
                raise ValueError(f"Unsupported ndarray shape {arr.shape}")

            return self._to_float01(gray) if self.normalize else gray.astype(np.uint8)
        else:
            raise ValueError(f"Unsupported type for image input: {type(x)}")

        # Use PIL pipeline
        if self.grayscale and img.mode != "L":
            img = img.convert("L")
        elif not self.grayscale and img.mode not in ("RGB", "L"):
            img = img.convert("RGB")

        gray_np = np.array(img)
        if gray_np.ndim == 3:  # RGB -> gray
            gray_np = 0.2126 * gray_np[..., 0] + 0.7152 * gray_np[..., 1] + 0.0722 * gray_np[..., 2]

        if self.normalize:
            # Revert to min-max normalization as required by existing tests
            return self._to_float01(gray_np)
        else:
            return gray_np.astype(np.uint8)

    @staticmethod
    def _to_float01(arr: np.ndarray) -> np.ndarray:
        """Normalize array to [0, 1] range using min-max scaling.

        Args:
            arr: Input numpy array.

        Returns:
            Normalized array with float32 values in [0, 1].
        """
        arr = arr.astype(np.float32)
        vmin, vmax = float(arr.min()), float(arr.max())
        arr = (arr - vmin) / (vmax - vmin) if vmax > vmin else np.zeros_like(arr, dtype=np.float32)
        return arr

    def _variance_of_laplacian(self, gray: np.ndarray) -> float:
        """Variance of Laplacian as a blur metric.

        Args:
            gray: Grayscale image array.

        Returns:
            Variance of Laplacian (higher values indicate more edges/sharpness).
        """
        g = gray.astype(np.float32)
        if self.laplacian_kernel == "5x5":
            k = np.array(
                [
                    [0, 0, -1, 0, 0],
                    [0, -1, -2, -1, 0],
                    [-1, -2, 16, -2, -1],
                    [0, -1, -2, -1, 0],
                    [0, 0, -1, 0, 0],
                ],
                dtype=np.float32,
            )
        else:
            k = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)

        # Use scipy for optimized convolution
        lap = signal.convolve2d(g, k, mode="same")
        return float(np.var(lap))

    def _entropy(self, gray: np.ndarray) -> float:
        """Shannon entropy of the gray histogram (natural log).

        Args:
            gray: Grayscale image array.

        Returns:
            Shannon entropy value. Returns NaN if histogram sum is zero.
        """
        g = gray
        if self.normalize:
            # histogram on [0,1]
            hist, _ = np.histogram(g, bins=self.entropy_bins, range=(0.0, 1.0))
        else:
            # uint8 range
            hist, _ = np.histogram(g, bins=min(256, self.entropy_bins), range=(0, 255))
        p = hist.astype(np.float64)
        s = p.sum()
        if s <= 0:
            return float("nan")
        p /= s
        # avoid log(0)
        p = p[p > 0]
        return float(-(p * np.log(p)).sum())

DEFAULT_OUTPUTS = {'luminosity': 'm_luminosity', 'contrast': 'm_contrast', 'blur': 'm_blur_level', 'entropy': 'm_entropy'} class-attribute instance-attribute

clip_percentiles = tuple(cfg.get('clip_percentiles')) instance-attribute

dataset_root_path = str(cfg.get('dataset_root_path', 'undefined')) instance-attribute

entropy_bins: int = int(cfg.get('entropy_bins', 256)) instance-attribute

grayscale: bool = bool(cfg.get('grayscale', True)) instance-attribute

input_columns = ['image_bytes'] instance-attribute

laplacian_kernel: str = str(cfg.get('laplacian_kernel', '3x3')) instance-attribute

normalize: bool = bool(cfg.get('normalize', True)) instance-attribute

output_features: Any = cfg_outputs.copy() if isinstance(cfg_outputs, dict) else self.DEFAULT_OUTPUTS.copy() instance-attribute

__init__(name: str = 'visual_metric', config: dict[str, Any] | None = None) -> None

Initialize the visual features processor.

Parameters:

Name Type Description Default
name str

Unique name of the processor instance.

'visual_metric'
config dict[str, Any] | None

Configuration dictionary containing: - input_columns: List containing the name of the image column (bytes or path). - output_features: Mapping of feature names to output column names. - grayscale: Whether to convert images to grayscale (default: True). - normalize: Whether to normalize pixel values to [0, 1] (default: True). - entropy_bins: Number of bins for entropy calculation (default: 256). - clip_percentiles: Tuple of (low, high) percentiles for intensity clipping. - laplacian_kernel: Size of the Laplacian kernel ('3x3' or '5x5'). - dataset_root_path: Root directory for relative image paths.

None
Source code in packages/dqm-ml-images/src/dqm_ml_images/visual_features.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(self, name: str = "visual_metric", config: dict[str, Any] | None = None) -> None:
    """
    Initialize the visual features processor.

    Args:
        name: Unique name of the processor instance.
        config: Configuration dictionary containing:
            - input_columns: List containing the name of the image column (bytes or path).
            - output_features: Mapping of feature names to output column names.
            - grayscale: Whether to convert images to grayscale (default: True).
            - normalize: Whether to normalize pixel values to [0, 1] (default: True).
            - entropy_bins: Number of bins for entropy calculation (default: 256).
            - clip_percentiles: Tuple of (low, high) percentiles for intensity clipping.
            - laplacian_kernel: Size of the Laplacian kernel ('3x3' or '5x5').
            - dataset_root_path: Root directory for relative image paths.
    """
    super().__init__(name, config)

    # Local view of config for convenience
    cfg = self.config or {}

    # handle relative paths in parquet to a dataset located at dataset_root_path
    self.dataset_root_path = str(cfg.get("dataset_root_path", "undefined"))

    if not hasattr(self, "input_columns") or not self.input_columns:
        self.input_columns = ["image_bytes"]

    if not hasattr(self, "output_features") or not self.output_features:
        # Use config-provided mapping if present, otherwise defaults
        cfg_outputs = cfg.get("output_features") if isinstance(cfg.get("output_features"), dict) else None
        self.output_features: Any = (
            cfg_outputs.copy() if isinstance(cfg_outputs, dict) else self.DEFAULT_OUTPUTS.copy()
        )

    # param
    self.grayscale: bool = bool(cfg.get("grayscale", True))
    self.normalize: bool = bool(cfg.get("normalize", True))
    self.entropy_bins: int = int(cfg.get("entropy_bins", 256))

    # TODO written to remove noqa 501 and type check error in same line, to be fixed properly later
    if cfg.get("clip_percentiles") is not None:
        self.clip_percentiles = tuple(cfg.get("clip_percentiles"))  # type: ignore
    else:
        self.clip_percentiles = None  # type: ignore

    self.laplacian_kernel: str = str(cfg.get("laplacian_kernel", "3x3"))

    # check if the transformation is defined in the processor
    if not isinstance(self.output_features, dict):
        raise ValueError(f"[{self.name}] 'output_features' must be a dict of metric->column_name")
    for k in ("luminosity", "contrast", "blur", "entropy"):
        if k not in self.output_features:
            self.output_features[k] = self.DEFAULT_OUTPUTS[k]

compute(batch_metrics: dict[str, pa.Array] | None = None) -> dict[str, pa.Array]

No dataset-level aggregation required for this processor.

Returns:

Type Description
dict[str, Array]

Empty dictionary as features are computed at batch level.

Source code in packages/dqm-ml-images/src/dqm_ml_images/visual_features.py
165
166
167
168
169
170
171
172
@override
def compute(self, batch_metrics: dict[str, pa.Array] | None = None) -> dict[str, pa.Array]:
    """No dataset-level aggregation required for this processor.

    Returns:
        Empty dictionary as features are computed at batch level.
    """
    return {}

compute_batch_metric(features: dict[str, pa.Array]) -> dict[str, pa.Array]

No-op aggregation: metrics are image-level only.

Returns:

Type Description
dict[str, Array]

Empty dictionary as this processor computes features only.

Source code in packages/dqm-ml-images/src/dqm_ml_images/visual_features.py
156
157
158
159
160
161
162
163
@override
def compute_batch_metric(self, features: dict[str, pa.Array]) -> dict[str, pa.Array]:
    """No-op aggregation: metrics are image-level only.

    Returns:
        Empty dictionary as this processor computes features only.
    """
    return {}

compute_features(batch: pa.RecordBatch, prev_features: dict[str, pa.Array] | None = None) -> dict[str, pa.Array]

Compute per-sample image features.

Parameters:

Name Type Description Default
batch RecordBatch

Input batch of data containing image column.

required
prev_features dict[str, Array] | None

Previously computed features (not used in this processor).

None

Returns:

Type Description
dict[str, Array]

Dictionary mapping feature names to their computed values.

Source code in packages/dqm-ml-images/src/dqm_ml_images/visual_features.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
@override
def compute_features(
    self,
    batch: pa.RecordBatch,
    prev_features: dict[str, pa.Array] | None = None,
) -> dict[str, pa.Array]:
    """Compute per-sample image features.

    Args:
        batch: Input batch of data containing image column.
        prev_features: Previously computed features (not used in this processor).

    Returns:
        Dictionary mapping feature names to their computed values.
    """
    if not self.input_columns:
        logger.warning(f"[{self.name}] no input_columns configured")
        return {}

    image_column = self.input_columns[0]
    if image_column not in batch.schema.names:
        logger.warning(f"[{self.name}] column '{image_column}' not found in batch")
        return {}

    col = batch.column(image_column)
    values = col.to_pylist()  #
    # Use grayscale image
    gray_images: list[Any] = []
    for idx, v in enumerate(values):
        try:
            gray = self._to_gray_np(v)
            if self.clip_percentiles is not None:
                p_lo, p_hi = self.clip_percentiles
                lo = np.percentile(gray, p_lo)
                hi = np.percentile(gray, p_hi)
                if hi > lo:
                    gray = np.clip(gray, lo, hi)
                    if self.normalize:
                        gray = (gray - lo) / max(1e-12, (hi - lo))
            gray_images.append(gray)
        except Exception as e:
            logger.exception(f"[{self.name}] failed to process sample {idx}: {e}")
            gray_images.append(None)

    # Compute each feature type with dedicated functions
    features = {}
    features[self.output_features["luminosity"]] = self._compute_luminosity_feature(gray_images)
    features[self.output_features["contrast"]] = self._compute_contrast_feature(gray_images)
    features[self.output_features["blur"]] = self._compute_blur_feature(gray_images)
    features[self.output_features["entropy"]] = self._compute_entropy_feature(gray_images)
    return features

reset() -> None

Reset processor state for new processing run.

Source code in packages/dqm-ml-images/src/dqm_ml_images/visual_features.py
174
175
def reset(self) -> None:
    """Reset processor state for new processing run."""