Coverage for packages / dqm-ml-job / src / dqm_ml_job / outputwriter / __init__.py: 100%

10 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-15 10:11 +0000

1"""Output writers module for DQM ML Job. 

2 

3This module contains classes for writing pipeline results (features 

4and metrics) to various storage backends. 

5 

6Classes: 

7 OutputWriter: Protocol for output writer implementations. 

8 ParquetOutputWriter: Writer that saves data to Parquet files. 

9""" 

10 

11from typing import Any, Protocol, runtime_checkable 

12 

13from dqm_ml_job.outputwriter.parquet import ParquetOutputWriter 

14 

15 

16@runtime_checkable 

17class OutputWriter(Protocol): 

18 """ 

19 Protocol for Output Writers. 

20 

21 Defines the interface for writing pipeline results (features or metrics) to storage. 

22 """ 

23 

24 columns: list[str] 

25 name: str 

26 

27 def write_metrics_dict(self, metrics_dict: dict[str, dict[str, Any]]) -> None: 

28 """ """ 

29 

30 def write_table(self, name: str, table: Any, part_index: int | None = None) -> None: 

31 """ 

32 Write a table (features or metrics) to the output. 

33 

34 Args: 

35 name: Name of the dataset or metric. 

36 table: The data to write (usually a pyarrow Table or dict of arrays). 

37 part_index: Index of the data part (for chunked writing). 

38 """ 

39 

40 

41dqml_outputs_registry = {"parquet": ParquetOutputWriter} 

42 

43 

44__all__ = ["OutputWriter", "ParquetOutputWriter", "dqml_outputs_registry"]