From 7983e57b7eba07982c50695833e65f5a3cdd72eb Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 28 May 2026 16:49:40 +0100 Subject: [PATCH 1/4] dev --- Changelog.rst | 10 + cf/data/dask_regrid.py | 384 +++++++++++++++++++++++--------------- cf/data/data.py | 4 + cf/docstring/docstring.py | 27 ++- cf/field.py | 12 ++ cf/regrid/regrid.py | 53 ++++++ 6 files changed, 334 insertions(+), 156 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index 66efd447c0..adb1acfd88 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,3 +1,13 @@ +Version NEXTVERSION +-------------- + +**2026-06-??** + +* New keyword parameter to `cf.Field.regrids` and `cf.Field.regridc`: + ``max_masked`` (https://github.com/NCAS-CMS/cf-python/issues/???) + +---- + Version 3.20.0 -------------- diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 160fe7b209..ab1ef9d765 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -13,164 +13,185 @@ def regrid( axis_order=None, ref_src_mask=None, min_weight=None, + max_masked=0, ): """Regrid an array. - .. versionadded:: 3.14.0 - - .. seealso:: `regrid_weights`, `_regrid`, `cf.Data._regrid` - - :Parameters: - - a: `numpy.ndarray` - The array to be regridded. - - weights_dst_mask: 2-`tuple` - The sparse weights matrix that defines the regridding - operation; and the mask to be applied to the regridded - data (as yet unmodified for the source grid mask). - - **weights** - - The dense weights matrix has J rows and I columns, where J - and I are the total number of cells in the destination and - source grids respectively. - - The weights matrix only describes cells defined by the - regridding dimensions. If the array *a* includes - non-regridding dimensions then, in essence, the regrid - operation is carried out separately for each slice of the - regridding dimensions. For instance, if *a* represents T, - Z, Y, X dimensions with shape ``(12, 20, 73, 96)`` and is - to have its Y and X dimension regridded, then the result - may be thought of as the concatenation of the 240 - individual regrids arising from all of the T and Z - dimension combinations. - - Each element w_ji is the multiplicative weight that - defines how much of Vs_i (the value in source grid cell i) - contributes to Vd_j (the value in destination grid cell - j). - - The final value of Vd_j is the sum of w_ji * Vs_i for all - source grid cells i. Note that it is typical that for a - given j most w_ji will be zero, reflecting the fact only a - few source grid cells intersect a particular destination - grid cell. I.e. *weights* is usually a very sparse matrix. - - If the destination grid has masked cells, either because - it spans areas outside of the source grid, or by selection - (such as ocean cells for land-only data), then the - corresponding rows in the weights matrix must be be - entirely missing data. - - For the patch recovery and second-order conservative - regridding methods, the weights matrix will have been - constructed taking into account the mask of the source - grid, which must match the mask of *a* for its regridding - dimensions. - - For all other regridding methods, the weights matrix will - have been constructed assuming that no source grid cells - are masked, and the weights matrix will be modified - on-the-fly to account for any masked elements of *a* in - each regridding slice. - - It is assumed that data-type of the weights matrix is same - as the desired data-type of the regridded data. - - See section 12.3 "Regridding Methods" of - https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node1.html - - **dst_mask** - - If a `numpy.ndarray` with shape ``(J,)`` then this is the - reference destination grid mask that was used during the - creation of the weights. If `None` then there are no - reference destination grid masked points. - - In either case the reference destination grid mask may get - updated (not in-place) to account for source grid masked - points. - - method: `str` - The name of the regridding method. - - src_shape: sequence of `int` - The shape of the source grid. - - dst_shape: sequence of `int` - The shape of the destination grid. - - axis_order: sequence of `int` - The axis order that transposes *a* so that the regrid axes - become the trailing dimensions, ordered consistently with - the order used to create the weights matrix; and the - non-regrid axes become the leading dimensions. - - *Parameter example:* - If the regrid axes are in positions 2 and 1 for 4-d - data: ``[0, 3, 2, 1]`` - - *Parameter example:* - If the regrid axes are in positions 0 and 3 for 4-d - data: ``[1, 2, 0, 3]`` - - *Parameter example:* - If the regrid axis is in position 0 for 3-d data: ``[1, - 2, 0]`` - - ref_src_mask: `numpy.ndarray` or `None` - If a `numpy.ndarray` with shape *src_shape* then this is - the reference source grid mask that was used during the - creation of the weights matrix given by *weights*, and the - mask of each regrid slice of *a* must therefore be - identical to *ref_src_mask*. If *ref_src_mask* is a scalar - array with value `False`, then this is equivalent to a - reference source grid mask with shape *src_shape* entirely - populated with `False`. - - If `None` (the default), then the weights matrix will have - been created assuming no source grid mask, and the mask of - each regrid slice of *a* is automatically applied to - *weights* prior to the regridding calculation. - - min_weight: float, optional - A very small non-negative number. By default *min_weight* - is ``2.5 * np.finfo("float64").eps``, - i.e. ``5.551115123125783e-16`. It is used during linear - and first-order conservative regridding when adjusting the - weights matrix to account for the data mask. It is ignored - for all other regrid methods, or if data being regridded - has no missing values. - - In some cases (described below) for which weights might - only be non-zero as a result of rounding errors, the - *min_weight* parameter controls whether or a not cell in - the regridded field is masked. + .. versionadded:: 3.14.0 - The default value has been chosen empirically as the - smallest value that produces the same masks as esmpy for - the use cases defined in the cf test suite. + .. seealso:: `regrid_weights`, `_regrid`, `cf.Data._regrid` - **Linear regridding** + :Parameters: - Destination grid cell j will only be masked if a) it is - masked in the destination grid definition; or b) ``w_ji >= - min_weight`` for those masked source grid cells i for - which ``w_ji > 0``. - - **Conservative first-order regridding** - - Destination grid cell j will only be masked if a) it is - masked in the destination grid definition; or b) the sum - of ``w_ji`` for all non-masked source grid cells i is - strictly less than *min_weight*. - - :Returns: - - `numpy.ndarray` - The regridded data. + a: `numpy.ndarray` + The array to be regridded. + + weights_dst_mask: 2-`tuple` + The sparse weights matrix that defines the regridding + operation; and the mask to be applied to the regridded + data (as yet unmodified for the source grid mask). + + **weights** + + The dense weights matrix has J rows and I columns, where J + and I are the total number of cells in the destination and + source grids respectively. + + The weights matrix only describes cells defined by the + regridding dimensions. If the array *a* includes + non-regridding dimensions then, in essence, the regrid + operation is carried out separately for each slice of the + regridding dimensions. For instance, if *a* represents T, + Z, Y, X dimensions with shape ``(12, 20, 73, 96)`` and is + to have its Y and X dimension regridded, then the result + may be thought of as the concatenation of the 240 + individual regrids arising from all of the T and Z + dimension combinations. + + Each element w_ji is the multiplicative weight that + defines how much of Vs_i (the value in source grid cell i) + contributes to Vd_j (the value in destination grid cell + j). + + The final value of Vd_j is the sum of w_ji * Vs_i for all + source grid cells i. Note that it is typical that for a + given j most w_ji will be zero, reflecting the fact only a + few source grid cells intersect a particular destination + grid cell. I.e. *weights* is usually a very sparse matrix. + + If the destination grid has masked cells, either because + it spans areas outside of the source grid, or by selection + (such as ocean cells for land-only data), then the + corresponding rows in the weights matrix must be be + entirely missing data. + + For the patch recovery and second-order conservative + regridding methods, the weights matrix will have been + constructed taking into account the mask of the source + grid, which must match the mask of *a* for its regridding + dimensions. + + For all other regridding methods, the weights matrix will + have been constructed assuming that no source grid cells + are masked, and the weights matrix will be modified + on-the-fly to account for any masked elements of *a* in + each regridding slice. + + It is assumed that data-type of the weights matrix is same + as the desired data-type of the regridded data. + + See section 12.3 "Regridding Methods" of + https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node1.html + + **dst_mask** + + If a `numpy.ndarray` with shape ``(J,)`` then this is the + reference destination grid mask that was used during the + creation of the weights. If `None` then there are no + reference destination grid masked points. + + In either case the reference destination grid mask may get + updated (not in-place) to account for source grid masked + points. + + method: `str` + The name of the regridding method. + + src_shape: sequence of `int` + The shape of the source grid. + + dst_shape: sequence of `int` + The shape of the destination grid. + + axis_order: sequence of `int` + The axis order that transposes *a* so that the regrid axes + become the trailing dimensions, ordered consistently with + the order used to create the weights matrix; and the + ` non-regrid axes become the leading dimensions. + + *Parameter example:* + If the regrid axes are in positions 2 and 1 for 4-d + data: ``[0, 3, 2, 1]`` + + *Parameter example:* + If the regrid axes are in positions 0 and 3 for 4-d + data: ``[1, 2, 0, 3]`` + + *Parameter example:* + If the regrid axis is in position 0 for 3-d data: ``[1, + 2, 0]`` + + ref_src_mask: `numpy.ndarray` or `None` + If a `numpy.ndarray` with shape *src_shape* then this is + the reference source grid mask that was used during the + creation of the weights matrix given by *weights*, and the + mask of each regrid slice of *a* must therefore be + identical to *ref_src_mask*. If *ref_src_mask* is a scalar + array with value `False`, then this is equivalent to a + reference source grid mask with shape *src_shape* entirely + populated with `False`. + + If `None` (the default), then the weights matrix will have + been created assuming no source grid mask, and the mask of + each regrid slice of *a* is automatically applied to + *weights* prior to the regridding calculation. + + min_weight: float, optional + A very small non-negative number. By default *min_weight* + is ``2.5 * np.finfo("float64").eps``, + i.e. ``5.551115123125783e-16`. It is used during linear + and first-order conservative regridding when adjusting the + weights matrix to account for the data mask. It is ignored + for all other regrid methods, or if data being regridded + has no missing values. + + In some cases (described below) for which weights might + only be non-zero as a result of rounding errors, the + *min_weight* parameter controls whether or a not cell in + the regridded field is masked. + + The default value has been chosen empirically as the + smallest value that produces the same masks as esmpy for + the use cases defined in the cf test suite. + + **Linear regridding** + + Destination grid cell j will only be masked if a) it is + masked in the destination grid definition; or b) the + number of ``w_ji >= min_weight`` for those masked source + grid cells i for which ``w_ji > 0`` exceeds the + *max_masked* parameter. + + **Conservative first-order regridding** + + Destination grid cell j will only be masked if a) it is + masked in the destination grid definition; or b) the sum + of ``w_ji`` for all non-masked source grid cells i is + strictly less than *min_weight*. + + max_masked, `int`, optional + For linear regridding only. Ignored for all other + regridding methods. + + The maximum allow number of masked source cells which are + allowed to be ignored when calculating a non-masked + destination cell. When masked source cells are ignored, + the weights w_ji of non-masked source cells i are adjusted + so that they sum to 1. + + By default *max_masked* is ``0``, meaning that + destination grid cell j will be masked if source cell + i is masked and ``w_ji >= min_weight``. If set to + ``N``, then destination grid cell j will be masked if + more than ``N`` source cells i are masked with ``w_ji + >= min_weight``. + + .. versionadded:: NEXTVERSION + + :Returns: + + `numpy.ndarray` + The regridded data. """ weights, dst_mask = weights_dst_mask @@ -290,7 +311,13 @@ def regrid( # for all slices => all slices can be regridded # simultaneously. a, _, _, _ = _regrid( - a, src_mask, dst_mask, weights, method, min_weight=min_weight + a, + src_mask, + dst_mask, + weights, + method, + min_weight=min_weight, + max_masked=max_masked, ) del _ @@ -366,6 +393,7 @@ def _regrid( prev_dst_mask=None, prev_weights=None, min_weight=None, + max_masked=0, ): """Worker function for `regrid`. @@ -421,6 +449,16 @@ def _regrid( See `regrid` for details. + max_masked: `int`, optional + For linear regridding only. Ignored for all other + regridding methods. The maximum allow number of masked + source cells which are allowed to be ignored when + calculating a non-masked destination cell. + + See `regrid` for details. + + .. versionadded:: NEXTVERSION + method: `str` The name of the regridding method. @@ -543,6 +581,8 @@ def _regrid( else: dst_mask = dst_mask.copy() + weights = weights.copy() + # Note: It is much more efficient to access # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating @@ -556,11 +596,45 @@ def _regrid( data = weights.data for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] - if not count_nonzero(mask): + n_masked = count_nonzero(mask) + if not n_masked: + # There are no masked src cells continue - if where(data[i0:i1][mask] >= min_weight)[0].size: + if n_masked == mask.size: + # There are no non-masked src cells + dst_mask[j] = True + continue + + w = data[i0:i1] + print(j, where(w[mask] >= min_weight)[0], w, mask, max_masked) + if where(w[mask] >= min_weight)[0].size > max_masked: + print(j, "> mm") + # There are more masked src cells than allowed dst_mask[j] = True + else: + # The number of masked src cells does not exceed + # the minimum masked-cells threshold + non_masked_indices = where((~mask) & (w >= min_weight))[0] + print(non_masked_indices) + if non_masked_indices.size == 1: + # There areis exactly one non-masked src cell + # with weight above the minimum weights + # threshold + w[:] = 0 + w[non_masked_indices] = 1 + data[i0:i1] = w + elif non_masked_indices.size: + # There are some non-masked src cells with + # weights above the minimum weights threshold + D_j = w[non_masked_indices].sum() + w = w / D_j + w[mask] = 0 + data[i0:i1] = w + else: + # There are no non-masked src cells with + # weights above the minimum weights threshold + dst_mask[j] = True elif method == "nearest_dtos": # 3) Nearest neighbour dtos method: diff --git a/cf/data/data.py b/cf/data/data.py index 83cec9b0df..7a83f9cbf8 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2345,6 +2345,7 @@ def _regrid( regrid_axes=None, regridded_sizes=None, min_weight=None, + max_masked=0, ): """Regrid the data. @@ -2380,6 +2381,8 @@ def _regrid( {{min_weight: float, optional}} + {{max_masked: `int`, optional}} + :Returns: `Data` @@ -2478,6 +2481,7 @@ def _regrid( dst_shape=operator.dst_shape, axis_order=non_regrid_axes + list(regrid_axes), min_weight=min_weight, + max_masked=max_masked, ) # Performance note: diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 269212ac4f..c7863686d8 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -394,7 +394,8 @@ Destination grid cell ``j`` will only be masked if a) it is masked in the destination grid definition; or b) ``w_ji >= min_weight`` for those masked source grid - cells ``i`` for which ``w_ji > 0``. + cells ``i`` for which ``w_ji > 0`` exceeds the + *max_masked* parameter. **Conservative first-order regridding** @@ -402,6 +403,30 @@ it is masked in the destination grid definition; or b) the sum of ``w_ji`` for all non-masked source grid cells ``i`` is strictly less than *min_weight*.""", + # max_masked + "{{max_masked: `int`, optional}}": """max_masked: `int`, optional + For linear regridding only. Ignored for all other + regridding methods. + + The maximum allow number of masked source cells which + are allowed to be ignored when calculating a + non-masked destination cell. When masked source cells + are ignored, the weights of non-masked source cells i + are adjusted so that they sum to 1. + + Define ``w_ji`` as the multiplicative weight that + defines how much of ``Vs_i`` (the value in source grid + cell ``i``) contributes to ``Vd_j`` (the value in + destination grid cell ``j``). + + By default *max_masked* is ``0``, meaning that + destination grid cell j will be masked if source cell + i is masked and ``w_ji >= min_weight``. If set to + ``N``, then destination grid cell j will be masked if + more than ``N`` source cells i are masked with ``w_ji + >= min_weight``. + + .. versionadded:: NEXTVERSION""", # weights_file "{{weights_file: `str` or `None`, optional}}": """weights_file: `str` or `None`, optional Provide a netCDF file that contains, or will contain, diff --git a/cf/field.py b/cf/field.py index f9b3ca19b6..fb9307f8b6 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13659,6 +13659,7 @@ def regrids( return_operator=False, check_coordinates=False, min_weight=None, + max_masked=0, weights_file=None, src_z=None, dst_z=None, @@ -13863,6 +13864,10 @@ def regrids( .. versionadded:: 3.14.0 + {{max_masked, `int`, optional}} + + .. versionadded:: NEXTVERSION + {{weights_file: `str` or `None`, optional}} Ignored if *dst* is a `RegridOperator`. @@ -14024,6 +14029,7 @@ def regrids( return_operator=return_operator, check_coordinates=check_coordinates, min_weight=min_weight, + max_masked=max_masked, weights_file=weights_file, src_z=src_z, dst_z=dst_z, @@ -14050,6 +14056,7 @@ def regridc( return_operator=False, check_coordinates=False, min_weight=None, + max_masked=0, weights_file=None, src_z=None, dst_z=None, @@ -14191,6 +14198,10 @@ def regridc( .. versionadded:: 3.14.0 + {{max_masked, `int`, optional}} + + .. versionadded:: NEXTVERSION + {{weights_file: `str` or `None`, optional}} .. versionadded:: 3.15.2 @@ -14336,6 +14347,7 @@ def regridc( return_operator=return_operator, check_coordinates=check_coordinates, min_weight=min_weight, + max_masked=0, weights_file=weights_file, src_z=src_z, dst_z=dst_z, diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 982f53b32b..9426d72a9f 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -148,6 +148,7 @@ def regrid( return_operator=False, check_coordinates=False, min_weight=None, + max_masked=0, weights_file=None, return_esmpy_regrid_operator=False, dst_grid_partitions=1, @@ -261,6 +262,57 @@ def regrid( performed (checking that the coordinate system, cyclicity and grid shape are the same). + min_weight: float, optional + A very small non-negative number. By default *min_weight* + is ``2.5 * np.finfo("float64").eps``, + i.e. ``5.551115123125783e-16`. It is used during linear + and first-order conservative regridding when adjusting the + weights matrix to account for the data mask. It is ignored + for all other regrid methods, or if data being regridded + has no missing values. + + In some cases (described below) for which weights might + only be non-zero as a result of rounding errors, the + *min_weight* parameter controls whether or a not cell in + the regridded field is masked. + + The default value has been chosen empirically as the + smallest value that produces the same masks as esmpy for + the use cases defined in the cf test suite. + + **Linear regridding** + + Destination grid cell j will only be masked if a) it is + masked in the destination grid definition; or b) the + number of ``w_ji >= min_weight`` for those masked source + grid cells i for which ``w_ji > 0`` exceeds the + *max_masked* parameter. + + **Conservative first-order regridding** + + Destination grid cell j will only be masked if a) it is + masked in the destination grid definition; or b) the sum + of ``w_ji`` for all non-masked source grid cells i is + strictly less than *min_weight*. + + max_masked, `int`, optional + For linear regridding only. Ignored for all other + regridding methods. + + The maximum allow number of masked source cells which are + allowed to be ignored when calculating a non-masked + destination cell. When masked source cells are ignored, + the weights w_ij of non-masked source cells i are adjusted + so that they sum to 1. + + By default it is ``0``, meaning that destination grid cell + j will be masked if source cell i is masked and ``w_ji >= + min_weight``. If set to ``N``, then destination grid cell + j will be masked if more than ``N`` source cells i are + masked with ``w_ji >= min_weight``. + + .. versionadded:: NEXTVERSION + inplace: `bool`, optional If True then modify *src* in-place and return `None`. @@ -774,6 +826,7 @@ def regrid( regrid_axes=src_grid.axis_indices, regridded_sizes=regridded_axis_sizes, min_weight=min_weight, + max_masked=max_masked, ) # ---------------------------------------------------------------- From 7055117f4e0fdcc5d7c7f892e9c174e38b6e17f2 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 29 May 2026 11:38:12 +0100 Subject: [PATCH 2/4] dev --- Changelog.rst | 2 +- cf/data/dask_regrid.py | 5 +- cf/field.py | 6 +- cf/regrid/regrid.py | 2 +- cf/test/test_regrid.py | 189 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 195 insertions(+), 9 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index adb1acfd88..6fbd34f7d5 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -6,7 +6,7 @@ Version NEXTVERSION * New keyword parameter to `cf.Field.regrids` and `cf.Field.regridc`: ``max_masked`` (https://github.com/NCAS-CMS/cf-python/issues/???) ----- +---- Version 3.20.0 -------------- diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index ab1ef9d765..d0c5e29807 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -169,7 +169,7 @@ def regrid( of ``w_ji`` for all non-masked source grid cells i is strictly less than *min_weight*. - max_masked, `int`, optional + max_masked: `int`, optional For linear regridding only. Ignored for all other regridding methods. @@ -607,16 +607,13 @@ def _regrid( continue w = data[i0:i1] - print(j, where(w[mask] >= min_weight)[0], w, mask, max_masked) if where(w[mask] >= min_weight)[0].size > max_masked: - print(j, "> mm") # There are more masked src cells than allowed dst_mask[j] = True else: # The number of masked src cells does not exceed # the minimum masked-cells threshold non_masked_indices = where((~mask) & (w >= min_weight))[0] - print(non_masked_indices) if non_masked_indices.size == 1: # There areis exactly one non-masked src cell # with weight above the minimum weights diff --git a/cf/field.py b/cf/field.py index fb9307f8b6..98ba7eda71 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13864,7 +13864,7 @@ def regrids( .. versionadded:: 3.14.0 - {{max_masked, `int`, optional}} + {{max_masked: `int`, optional}} .. versionadded:: NEXTVERSION @@ -14198,7 +14198,7 @@ def regridc( .. versionadded:: 3.14.0 - {{max_masked, `int`, optional}} + {{max_masked: `int`, optional}} .. versionadded:: NEXTVERSION @@ -14347,7 +14347,7 @@ def regridc( return_operator=return_operator, check_coordinates=check_coordinates, min_weight=min_weight, - max_masked=0, + max_masked=max_masked, weights_file=weights_file, src_z=src_z, dst_z=dst_z, diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 9426d72a9f..59236f2bc3 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -295,7 +295,7 @@ def regrid( of ``w_ji`` for all non-masked source grid cells i is strictly less than *min_weight*. - max_masked, `int`, optional + max_masked: `int`, optional For linear regridding only. Ignored for all other regridding methods. diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index f5d4675ca4..58eaa94204 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -837,6 +837,195 @@ def test_return_esmpy_regrid_operator(self): self.assertIsInstance(opers, esmpy.api.regrid.Regrid) self.assertIsInstance(operc, esmpy.api.regrid.Regrid) + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_regrids_max_masked(self): + """Test max_masked keyword to regrids.""" + self.assertFalse(cf.regrid_logging()) + + # Source grid + s = cf.example_field(0) + + # Destination grid + d = s[1:, :] + x = d.dimension_coordinate("X") + y = d.dimension_coordinate("Y") + x.del_bounds() + y.del_bounds() + x[...] = [0.0, 45.0, 90.0, 135.0, 180.0, 225.0, 270.0, 315.0] + y[...] = [-60.0, -22.5, 22.5, 60.0] + + # No missing values + x = s.regrids(d, method="linear") + self.assertEqual(x.data.count().array, 32) + + # Create some masked source cells + for i in range(5): + s[i, i:] = cf.masked + + self.assertTrue( + np.array_equal( + s.data.mask, + [ + [True, True, True, True, True, True, True, True], + [False, True, True, True, True, True, True, True], + [False, False, True, True, True, True, True, True], + [False, False, False, True, True, True, True, True], + [False, False, False, False, True, True, True, True], + ], + ) + ) + + x = s.regrids(d, method="linear", use_dst_mask=False, max_masked=0) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, True, True, True, True, True, True, True], + [True, True, True, True, True, True, True, True], + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + ], + ) + ) + + x = s.regrids(d, method="linear", use_dst_mask=False, max_masked=1) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, True, True, True, True, True, True, True], + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + [True, False, False, False, True, True, True, True], + ], + ) + ) + + x = s.regrids(d, method="linear", use_dst_mask=False, max_masked=2) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, True, True, True, True, True, True, True], + [False, False, True, True, True, True, True, True], + [False, False, False, True, True, True, True, True], + [False, False, False, False, True, True, True, True], + ], + ) + ) + + x = s.regrids(d, method="linear", use_dst_mask=False, max_masked=3) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [False, False, True, True, True, True, True, True], + [False, False, False, True, True, True, True, True], + [False, False, False, False, True, True, True, True], + [False, False, False, False, False, True, True, True], + ], + ) + ) + + def test_regridc_max_masked(self): + """Test max_masked keyword to regridc.""" + self.assertFalse(cf.regrid_logging()) + + # Source grid + s = cf.example_field(0) + + # Destination grid + d = s[1:, :] + x = d.dimension_coordinate("X") + y = d.dimension_coordinate("Y") + x.del_bounds() + y.del_bounds() + x[...] = [0.0, 45.0, 90.0, 135.0, 180.0, 225.0, 270.0, 315.0] + y[...] = [-60.0, -22.5, 22.5, 60.0] + + # No missing values + x = s.regrids(d, method="linear") + self.assertEqual(x.data.count().array, 32) + + # Create some masked source cells + for i in range(5): + s[i, i:] = cf.masked + + self.assertTrue( + np.array_equal( + s.data.mask, + [ + [True, True, True, True, True, True, True, True], + [False, True, True, True, True, True, True, True], + [False, False, True, True, True, True, True, True], + [False, False, False, True, True, True, True, True], + [False, False, False, False, True, True, True, True], + ], + ) + ) + + axes = ["Y", "X"] + + x = s.regridc( + d, axes=axes, method="linear", use_dst_mask=False, max_masked=0 + ) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, True, True, True, True, True, True, True], + [True, True, True, True, True, True, True, True], + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + ], + ) + ) + + x = s.regridc( + d, axes=axes, method="linear", use_dst_mask=False, max_masked=1 + ) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, True, True, True, True, True, True, True], + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + [True, False, False, False, True, True, True, True], + ], + ) + ) + + x = s.regridc( + d, axes=axes, method="linear", use_dst_mask=False, max_masked=2 + ) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, True, True, True, True, True, True, True], + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + [True, False, False, False, True, True, True, True], + ], + ) + ) + + x = s.regridc( + d, axes=axes, method="linear", use_dst_mask=False, max_masked=3 + ) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + [True, False, False, False, True, True, True, True], + [True, False, False, False, False, True, True, True], + ], + ) + ) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) From eaff45a7f8719397d6765d3e3ec748404c299774 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 29 May 2026 12:17:50 +0100 Subject: [PATCH 3/4] dev --- cf/data/data.py | 2 ++ cf/docstring/docstring.py | 8 +++--- cf/regrid/regrid.py | 6 +++++ cf/test/test_regrid.py | 55 ++++++++++++++++++++++----------------- 4 files changed, 42 insertions(+), 29 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 7a83f9cbf8..fc6a3a2157 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2383,6 +2383,8 @@ def _regrid( {{max_masked: `int`, optional}} + .. versionadded:: NEXVERSION + :Returns: `Data` diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index c7863686d8..38b6c86f44 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -411,8 +411,8 @@ The maximum allow number of masked source cells which are allowed to be ignored when calculating a non-masked destination cell. When masked source cells - are ignored, the weights of non-masked source cells i - are adjusted so that they sum to 1. + are ignored, the weights of the non-masked source + cells are adjusted so that they sum to 1. Define ``w_ji`` as the multiplicative weight that defines how much of ``Vs_i`` (the value in source grid @@ -424,9 +424,7 @@ i is masked and ``w_ji >= min_weight``. If set to ``N``, then destination grid cell j will be masked if more than ``N`` source cells i are masked with ``w_ji - >= min_weight``. - - .. versionadded:: NEXTVERSION""", + >= min_weight``.""", # weights_file "{{weights_file: `str` or `None`, optional}}": """weights_file: `str` or `None`, optional Provide a netCDF file that contains, or will contain, diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 59236f2bc3..d04829b9a6 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -399,6 +399,12 @@ def regrid( """ debug = is_log_level_debug(logger) + if not isinstance(max_masked, int) or max_masked < 0: + raise ValueError( + "The max_masked keyword must be a non-negative integer. " + f"Got: {max_masked!r}" + ) + if not inplace: src = src.copy() diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index 58eaa94204..ae7a03a761 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -914,18 +914,24 @@ def test_regrids_max_masked(self): ) ) - x = s.regrids(d, method="linear", use_dst_mask=False, max_masked=3) - self.assertTrue( - np.array_equal( - x.data.mask, - [ - [False, False, True, True, True, True, True, True], - [False, False, False, True, True, True, True, True], - [False, False, False, False, True, True, True, True], - [False, False, False, False, False, True, True, True], - ], + for n in (3, 4, 5): + x = s.regrids(d, method="linear", use_dst_mask=False, max_masked=n) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [False, False, True, True, True, True, True, True], + [False, False, False, True, True, True, True, True], + [False, False, False, False, True, True, True, True], + [False, False, False, False, False, True, True, True], + ], + ) ) - ) + + # Check bad values of max_masked + for n in (-1, 3.14, "string", None): + with self.assertRaises(ValueError): + x = s.regrids(d, method="linear", max_masked=n) def test_regridc_max_masked(self): """Test max_masked keyword to regridc.""" @@ -1011,20 +1017,21 @@ def test_regridc_max_masked(self): ) ) - x = s.regridc( - d, axes=axes, method="linear", use_dst_mask=False, max_masked=3 - ) - self.assertTrue( - np.array_equal( - x.data.mask, - [ - [True, False, True, True, True, True, True, True], - [True, False, False, True, True, True, True, True], - [True, False, False, False, True, True, True, True], - [True, False, False, False, False, True, True, True], - ], + for n in (3, 4, 5): + x = s.regridc( + d, axes=axes, method="linear", use_dst_mask=False, max_masked=n + ) + self.assertTrue( + np.array_equal( + x.data.mask, + [ + [True, False, True, True, True, True, True, True], + [True, False, False, True, True, True, True, True], + [True, False, False, False, True, True, True, True], + [True, False, False, False, False, True, True, True], + ], + ) ) - ) if __name__ == "__main__": From 333b92765c0156240187db56d37567c82374052b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 29 May 2026 12:51:19 +0100 Subject: [PATCH 4/4] dev --- Changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.rst b/Changelog.rst index 6fbd34f7d5..2b3874df67 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -4,7 +4,7 @@ Version NEXTVERSION **2026-06-??** * New keyword parameter to `cf.Field.regrids` and `cf.Field.regridc`: - ``max_masked`` (https://github.com/NCAS-CMS/cf-python/issues/???) + ``max_masked`` (https://github.com/NCAS-CMS/cf-python/issues/949) ----