Source code for remote_sensing_processor.common.normalize.normalize

"""Normalize raster."""

from pydantic import validate_call
from typing import Optional, Union, cast

import warnings

import xarray as xr

from remote_sensing_processor.common.common_functions import create_path, persist
from remote_sensing_processor.common.common_raster import (
    check_dtype,
    load_dataset,
    prepare_nodata,
    restore_nodata_from_nan,
    set_nodata_to_nan,
    write_dataset,
)
from remote_sensing_processor.common.dataset import check_output, postprocess_dataset, read_dataset
from remote_sensing_processor.common.types import DirectoryPath, FilePath, NewPath, PystacItem


[docs] @validate_call def min_max( input_path: Union[FilePath, DirectoryPath, PystacItem], minimum: Optional[Union[int, float]], maximum: Optional[Union[int, float]], output_path: Optional[Union[FilePath, DirectoryPath, NewPath]] = None, clip_values: Optional[bool] = False, nodata: Optional[Union[int, float]] = None, write_stac: Optional[bool] = True, ) -> NewPath: """ Applies min-max normalization to an input file. Recommended if you want your data in the 0-1 range. Parameters ---------- input_path : string or STAC Item Path to input file, directory or STAC dataset or a STAC Item (e.g., from Planetary Computer). minimum : int or float Min value. maximum : int or float Max value. output_path : string (optional) Path to an output file, directory, or STAC dataset. If not set, then will overwrite the input files. Must be set if input is a remote STAC Item. clip_values: bool (default = False) If True, limits output values to min-max range. Otherwise, it will just normalize the values and if there are values outside the min-max range, they would be larger than 1 or smaller than 0. nodata : int or float (optional) Nodata value. If not set, then is read from inputs. write_stac : bool (default = True) If True, then output metadata is saved to a STAC file. Returns ------- pathlib.Path Path where output raster is saved. Examples -------- >>> import remote_sensing_processor as rsp >>> rsp.normalize.min_max( ... input_path="/home/rsp_test/mosaics/sentinel/B1.tif", ... minimum=0, ... maximum=10000, ... output_path="/home/rsp_test/mosaics/sentinel/B1_norm.tif", ... ) '/home/rsp_test/mosaics/sentinel/B1_norm.json' """ output_path = check_output(input_path, output_path) dataset = read_dataset(input_path) img = load_dataset(dataset) img, nodata = prepare_nodata(img, nodata) # Replacing nodata with nan img = set_nodata_to_nan(img) if not minimum < maximum: raise ValueError("minimum should be smaller than maximum") if img[next(iter(img.keys()))].where(img[next(iter(img.keys()))] != nodata).max().values.item() > maximum: warnings.warn( "Max value of the data (" + str(img[next(iter(img.keys()))].max().values.item()) + ") is higher than specified max value (" + str(maximum) + ")", stacklevel=1, ) elif img[next(iter(img.keys()))].where(img[next(iter(img.keys()))] != nodata).min().values.item() < minimum: warnings.warn( "Min value of the data (" + str(img[next(iter(img.keys()))].min().values.item()) + ") is lower than specified min value (" + str(minimum) + ")", stacklevel=1, ) # Normalization img = (img - minimum) / (maximum - minimum) img = cast("xr.Dataset", cast("object", img)) # Clipping values if clip_values: img = img.clip(0, 1) # Setting nodata to 0 img, _ = prepare_nodata(img, 0) # Restoring nodata values img = restore_nodata_from_nan(img) img = check_dtype(img) img = persist(img) # Creating an output folder create_path(output_path) # Creating final STAC dataset dataset, json_path = postprocess_dataset(dataset, img, output_path) # Write write_dataset(img, dataset, json_path) if write_stac: # Writing JSON metadata file dataset.save_object(dest_href=json_path.as_posix()) return json_path return output_path
[docs] @validate_call def z_score( input_path: Union[FilePath, DirectoryPath, PystacItem], mean: Optional[Union[int, float]], stddev: Optional[Union[int, float]], output_path: Optional[Union[FilePath, DirectoryPath, NewPath]] = None, nodata: Optional[Union[int, float]] = None, write_stac: Optional[bool] = True, ) -> NewPath: """Applies z-score normalization to an input file. Recommended if you want all of your data to have the same distribution, but the data will not be limited to 0-1 range. Parameters ---------- input_path : string or STAC Item Path to input file, directory or STAC dataset or a STAC Item (e.g., from Planetary Computer). mean : int or float Mean value. stddev : int or float Standard deviation value. output_path : string (optional) Path to an output file, directory, or STAC dataset. If not set, then will overwrite the input files. Must be set if input is a remote STAC Item. nodata : int or float (optional) Nodata value. If not set, then is read from inputs. write_stac : bool (default = True) If True, then output metadata is saved to a STAC file. Returns ------- pathlib.Path Path where output raster is saved. Examples -------- >>> import remote_sensing_processor as rsp >>> rsp.normalize.z_score( ... input_path="/home/rsp_test/mosaics/sentinel/B1.tif", ... mean=302, ... stddev=173, ... output_path="/home/rsp_test/mosaics/sentinel/B1_norm.tif", ... ) '/home/rsp_test/mosaics/sentinel/B1_norm.json' """ output_path = check_output(input_path, output_path) dataset = read_dataset(input_path) img = load_dataset(dataset) img, nodata = prepare_nodata(img, nodata) # Replacing nodata with nan img = set_nodata_to_nan(img) if img[next(iter(img.keys()))].where(img[next(iter(img.keys()))] != nodata).max().values.item() < mean: warnings.warn( "Max value of the data (" + str(img[next(iter(img.keys()))].max().values.item()) + ") is lower than specified mean value (" + str(mean) + ")", stacklevel=1, ) elif img[next(iter(img.keys()))].where(img[next(iter(img.keys()))] != nodata).min().values.item() > mean: warnings.warn( "Min value of the data (" + str(img[next(iter(img.keys()))].min().values.item()) + ") is higher than specified mean value (" + str(mean) + ")", stacklevel=1, ) # Normalization img = (img - mean) / stddev img = cast("xr.Dataset", cast("object", img)) # Setting nodata to 0 img, _ = prepare_nodata(img, 0) # Restoring nodata values img = restore_nodata_from_nan(img) img = check_dtype(img) img = persist(img) # Creating an output folder create_path(output_path) # Creating final STAC dataset dataset, json_path = postprocess_dataset(dataset, img, output_path) # Write write_dataset(img, dataset, json_path) if write_stac: # Writing JSON metadata file dataset.save_object(dest_href=json_path.as_posix()) return json_path return output_path
[docs] @validate_call def dynamic_world( input_path: Union[FilePath, DirectoryPath, PystacItem], percentile1: Optional[Union[int, float]], percentile2: Optional[Union[int, float]], output_path: Optional[Union[FilePath, DirectoryPath, NewPath]] = None, clip_values: Optional[bool] = True, nodata: Optional[Union[int, float]] = None, write_stac: Optional[bool] = True, ) -> NewPath: """Applies log-transform + sigmoid normalization to an input file. This normalization method is similar to a method described in https://doi.org/10.1038/s41597-022-01307-4. Recommended if you want to have your data in 0-1 range and handle outliers well. Parameters ---------- input_path : string or STAC Item Path to input file, directory or STAC dataset or a STAC Item (e.g., from Planetary Computer). percentile1 : int or float First log-transformed data percentile. percentile2 : int or float Second log-transformed data percentile. output_path : string (optional) Path to an output file, directory, or STAC dataset. If not set, then will overwrite the input files. Must be set if input is a remote STAC Item. clip_values: bool (default = True) If True, will convert negative values in input data to small positive values, otherwise negative values will become nodata because ln(neg) is a nan. nodata : int or float (optional) Nodata value. If not set, then is read from inputs. write_stac : bool (default = True) If True, then output metadata is saved to a STAC file. Returns ------- pathlib.Path Path where output raster is saved. Examples -------- >>> import remote_sensing_processor as rsp >>> rsp.normalize.dynamic_world( ... input_path="/home/rsp_test/mosaics/sentinel/B1.tif", ... percentile1=5.49, ... percentile2=5.78, ... output_path="/home/rsp_test/mosaics/sentinel/B1_norm.tif", ... ) '/home/rsp_test/mosaics/sentinel/B1_norm.json' """ output_path = check_output(input_path, output_path) dataset = read_dataset(input_path) img = load_dataset(dataset) img, nodata = prepare_nodata(img, nodata) # Replacing nodata with nan img = set_nodata_to_nan(img) if not percentile1 < percentile2: raise ValueError("percentile1 should be smaller than percentile2") # Normalization # Turning 0 to small positive values to log safely img = img.clip(1e-05) if clip_values else img.where(img != 0, 1e-05) # Log transform img = xr.ufuncs.log(img) # Linearly rescale log values between percentiles to [-1, 1] img = (img - percentile1) / (percentile2 - percentile1) # Apply sigmoid to map into (0, 1) img = xr.ufuncs.exp(img) img = img / (img + 1) # Setting nodata to 0 img, _ = prepare_nodata(img, 0) # Restoring nodata values img = restore_nodata_from_nan(img) img = check_dtype(img) img = persist(img) # Creating an output folder create_path(output_path) # Creating final STAC dataset dataset, json_path = postprocess_dataset(dataset, img, output_path) # Write write_dataset(img, dataset, json_path) if write_stac: # Writing JSON metadata file dataset.save_object(dest_href=json_path.as_posix()) return json_path return output_path