Skip to content

preprocessor

py3dinterpolations.modelling.preprocessor

Preprocessing pipeline for GridData.

PreprocessingKwargs

Bases: TypedDict

Type-safe kwargs for Preprocessor construction.

Preprocessor(griddata, downsampling_res=None, downsampling_method=DownsamplingStatistic.MEAN, normalize_xyz=True, standardize_v=True)

Preprocess GridData before interpolation.

Supports downsampling, normalization of XYZ, and standardization of V. Returns a new GridData with preprocessing params attached.

Parameters:

Name Type Description Default
griddata GridData

Source data to preprocess.

required
downsampling_res float | None

Block resolution for downsampling. None to skip.

None
downsampling_method DownsamplingStatistic | str | Callable[..., DataFrame]

Statistic for downsampling, or a custom callable.

MEAN
normalize_xyz bool

Whether to normalize XYZ to [0, 1].

True
standardize_v bool

Whether to standardize V to mean=0, std=1.

True
Source code in py3dinterpolations/modelling/preprocessor.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def __init__(
    self,
    griddata: GridData,
    downsampling_res: float | None = None,
    downsampling_method: (
        DownsamplingStatistic | str | Callable[..., pd.DataFrame]
    ) = DownsamplingStatistic.MEAN,
    normalize_xyz: bool = True,
    standardize_v: bool = True,
):
    self.griddata = griddata
    self.downsampling_res = downsampling_res
    self.downsampling_method = downsampling_method
    self.normalize_xyz = normalize_xyz
    self.standardize_v = standardize_v

preprocess()

Execute the preprocessing pipeline.

Returns:

Type Description
GridData

New GridData with preprocessed data and params attached.

Source code in py3dinterpolations/modelling/preprocessor.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def preprocess(self) -> GridData:
    """Execute the preprocessing pipeline.

    Returns:
        New GridData with preprocessed data and params attached.
    """
    logger.info("Starting preprocessing")
    data = self.griddata.data.copy().reset_index()[["ID", "X", "Y", "Z", "V"]]

    downsampling_params: DownsamplingParams | None = None
    normalization_params: dict[Axis, NormalizationParams] | None = None
    standardization_params: StandardizationParams | None = None

    if self.downsampling_res is not None:
        data = self._downsample_data(data, statistic=self.downsampling_method)
        downsampling_params = DownsamplingParams(resolution=self.downsampling_res)

    if self.normalize_xyz:
        data, normalization_params = self._normalize_xyz(data)

    if self.standardize_v:
        data, standardization_params = self._standardize_v(data)

    params = PreprocessingParams(
        downsampling=downsampling_params,
        normalization=normalization_params,
        standardization=standardization_params,
    )
    logger.info("Preprocessing complete: %s", params)
    return GridData(data, preprocessing_params=params)

reverse_preprocessing(griddata)

Reverse all reversible preprocessing transformations.

Reverses normalization of XYZ and standardization of V. Downsampling cannot be reversed.

Parameters:

Name Type Description Default
griddata GridData

GridData with preprocessing_params set.

required

Returns:

Type Description
GridData

New GridData with reversed transformations.

Raises:

Type Description
ValueError

If no preprocessing params are present.

Source code in py3dinterpolations/modelling/preprocessor.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def reverse_preprocessing(griddata: GridData) -> GridData:
    """Reverse all reversible preprocessing transformations.

    Reverses normalization of XYZ and standardization of V.
    Downsampling cannot be reversed.

    Args:
        griddata: GridData with preprocessing_params set.

    Returns:
        New GridData with reversed transformations.

    Raises:
        ValueError: If no preprocessing params are present.
    """
    params = griddata.preprocessing_params
    if params is None:
        msg = "No preprocessing has been applied to the data"
        raise ValueError(msg)

    data = griddata.data.copy().reset_index()

    if params.normalization is not None:
        for axis in [Axis.X, Axis.Y, Axis.Z]:
            norm = params.normalization[axis]
            data[axis.value] = data[axis.value] * (norm.max - norm.min) + norm.min

    if params.standardization is not None:
        std = params.standardization
        data["V"] = data["V"] * std.std + std.mean

    return GridData(data)