Source code for remote_sensing_processor.segmentation

import os
import numpy as np

import xarray

from remote_sensing_processor.common.torch_test import cuda_test

from remote_sensing_processor.segmentation.segmentation import segmentation_train, segmentation_test
from remote_sensing_processor.segmentation.tiles import get_ss_tiles
from remote_sensing_processor.segmentation.mapping import predict_map_from_tiles


[docs] def generate_tiles( x, y, tile_size=128, classification=True, shuffle=False, split=[1], split_names=['train'], x_output=None, y_output=None, x_dtype=None, y_dtype=None, x_nodata=None, y_nodata=None, ): """ Cut rasters into tiles. Parameters ---------- x : list of paths as strings Rasters to use as training data. y : path as a string or list of paths as strings Raster or multiple rasters to use as target variable. Can be set to None if target value is not needed. tile_size : int (default = 128) Size of tiles to generate (tile_size x tile_size). classification : bool (default = True) If True then tiles will be prepared for classification (e.g. semantic segmentation) task, else will be prepared for regression task. shuffle : bool (default = False) Is random shuffling of samples needed. split : list of ints or floats (optional) Splitting data in subsets. Is a list of integers defining proportions of every subset. [3, 1, 1] will generate 3 subsets in proportion 3 to 1 to 1. split_names : list of strings Names of split subsets. x_output : path as a string (optional) Path to save generated output x data. Data is saved in .zarr format. y_output : path as a string or list of paths as strings (optional) Path or list of paths to save generated output y data. Data is saved in .zarr format. x_dtype : dtype definition as a string (optional) If you run out of memory, you can try to convert your data to less memory consuming format. y_dtype : dtype definition as a string (optional) If you run out of memory, you can try to convert your data to less memory consuming format. x_nodata : int or float (optional) You can define which value in x raster corresponds to nodata and areas that contain nodata in x raster will be ignored while training and testing. Tiles that contain only nodata in both x and y will be omited. If not defined then nodata of first x file will be used. y_nodata : int or float (optional) You can define which value in y raster corresponds to nodata and areas that contain nodata in y raster will be ignored while training and testing. Tiles that contain only nodata in both x and y will be omited. If not defined then nodata of y file will be used. Returns ---------- tuple: xarray.Dataarray Array with generated x data. xarray.Dataarray or list of xarray.Dataarray or None List of arrays with generated y data - one array for each y raster. Examples -------- >>> x = ['/home/rsp_test/mosaics/sentinel/B1.tif', ... '/home/rsp_test/mosaics/sentinel/B2.tif', ... '/home/rsp_test/mosaics/sentinel/B3.tif', ... '/home/rsp_test/mosaics/sentinel/B4.tif', ... '/home/rsp_test/mosaics/sentinel/B5.tif', ... '/home/rsp_test/mosaics/sentinel/B6.tif', ... '/home/rsp_test/mosaics/sentinel/B7.tif', ... '/home/rsp_test/mosaics/sentinel/B8.tif', ... '/home/rsp_test/mosaics/sentinel/B8A.tif', ... '/home/rsp_test/mosaics/sentinel/B9.tif', ... '/home/rsp_test/mosaics/sentinel/B11.tif', ... '/home/rsp_test/mosaics/sentinel/B12.tif'] >>> y = ['/home/rsp_test/mosaics/landcover.tif', ... '/home/rsp_test/mosaics/forest_types.tif'] >>> x_file = '/home/rsp_test/model/x.zarr' >>> y_files = ['/home/rsp_test/model/y_landcover.zarr', ... '/home/rsp_test/model/y_forest_types.zarr'] >>> x_out, y_out = rsp.segmentation.generate_tiles( ... x, ... y, ... tile_size=256, ... shuffle=True, ... split=[3, 1, 1], ... split_names=['train', 'val', 'test'], ... x_output=x_file, ... y_output=y_files, ... x_nodata=0, ... y_nodata=0 ... ) >>> print(x_out.shape) (12, 8704, 6912) >>> y_landcover = y_out[0] >>> print(y_landcover.shape) (8704, 6912) >>> y_forest_types = y_out[1] >>> print(y_forest_types.shape) (8704, 6912) """ # Type checking if isinstance(x, str): x = [x] elif isinstance(x, list): for i in x: if not isinstance(i, str): raise TypeError("x must be a string or a list of strings") else: raise TypeError("x must be a string or a list of strings") for i in x: if not os.path.exists(i): raise OSError(i + " does not exist") if isinstance(y, str): y = [y] elif isinstance(y, list): for i in y: if not isinstance(i, str): raise TypeError("y must be a string or a list of strings") elif not isinstance(y, type(None)): raise TypeError("y must be a string or a list of strings") if isinstance(y, list): for i in y: if not os.path.exists(i): raise OSError(i + " does not exist") if not isinstance(tile_size, int): if isinstance(tile_size, type(None)): tile_size = 128 else: raise TypeError("tile_size must be an integer") else: if tile_size <= 8: raise ValueError("tile_size must be > 8") if not isinstance(classification, bool): if isinstance(classification, type(None)): classification = True else: raise TypeError("classification must be boolean") if not isinstance(shuffle, bool): if isinstance(shuffle, type(None)): shuffle = False else: raise TypeError("shuffle must be boolean") if isinstance(split, list): for i in split: if not isinstance(i, int) and not isinstance(i, float): raise TypeError("split must be a list of ints or floats") elif isinstance(split, type(None)): split = [1] else: raise TypeError("split must be a list of ints or floats") if isinstance(split_names, list): for i in split_names: if not isinstance(i, str): raise TypeError("split_names must be a list of strings") elif isinstance(split_names, type(None)): split_names = ['train'] else: raise TypeError("split_names must be a list of strings") assert len(split) == len(split_names) if not isinstance(x_output, str) and not isinstance(x_output, type(None)): raise TypeError("x_outputs must be a string") if isinstance(y_output, str): y_output = [y_output] if isinstance(y_output, list): assert len(y_output) == len(y) for i in y_output: if not isinstance(i, str): raise TypeError("y_output must be a string or a list of strings") elif not isinstance(y_output, type(None)): raise TypeError("y_output must be a list of strings") if not isinstance(x_dtype, type(None)): np.dtype(x_dtype) if not isinstance(y_dtype, type(None)): np.dtype(y_dtype) if not isinstance(x_nodata, int) and not isinstance(x_nodata, float) and not isinstance(x_nodata, type(None)): raise TypeError("x_nodata must be integer or float") if not isinstance(y_nodata, int) and not isinstance(y_nodata, float) and not isinstance(y_nodata, type(None)): raise TypeError("y_nodata must be integer or float") x, y = get_ss_tiles( x=x, y=y, tile_size=tile_size, classification=classification, shuffle=shuffle, split=split, split_names=split_names, x_output=x_output, y_output=y_output, x_dtype=x_dtype, y_dtype=y_dtype, x_nodata=x_nodata, y_nodata=y_nodata, ) return x, y
[docs] def train( train_datasets, val_datasets, model_file, model, backbone=None, checkpoint=None, weights=None, epochs=5, batch_size=32, repeat=1, augment=False, less_metrics=False, lr=1e-3, num_workers=0, classification=None, num_classes=None, y_nodata=None, **kwargs ): """ Trains segmentation model. Parameters ---------- train_datasets : list or list of lists Training data generated by generate_tiles() function. Each dataset is a list of 3 elements: training data (x): file path or xarray.DataArray, target variable (y): file path or xarray.DataArray, split_names: string or list of strings or 'all' if you need to use the whole dataset. You can provide a list of datasets to train model on multiple datasets. val_datasets : list or list of lists or None Validation data generated by generate_tiles() function. Each dataset is a list of 3 elements: training data (x): file path or xarray.DataArray, target variable (y): file path or xarray.DataArray, split_names: string or list of strings or 'all' if you need to use the whole dataset. You can provide a list of datasets to validate model on multiple datasets. Can be set to None if no validation needed. model_file : path as a string Checkpoint file where model will be saved after training. File extension must be *.ckpt for neural networks and *.joblib for scikit-learn models. model : str Name of model architecture. backbone : str (optional) Backbone, solver or kernel of a model, if multiple backbones are supported. checkpoint : path as a string (optional) Checkpoint file (*.ckpt or *.joblib) of a pre-trained model to fine-tune. weights : str Name of pre-trained weights to fine-tune. Only works for neural networks. epochs : int (default = 5) Number of training epochs. Only works for neural networks and multilayer perceptron. batch_size : int (default = 32) Number of training samples used in one iteration. Only works for neural networks. repeat : int (default = 1) Increase size of a dataset by repeating it n times. augment : bool (default = False) Apply augmentations to dataset. less_metrics : bool (default = False) Sometimes Torchmetrics can freeze while calculating precision, recall and IOU. If it happens, try restarting with `less_metrics = True`. lr : float (default = 1e-3) Learning rate of a model. Lower value results usually in better model convergence, but much slower training. num_workers: int or 'auto' (default = 0) Number of parallel workers that will load the data. Set 'auto' to let RSP choose the optimal number of workers, set 0 to disable multiprocessing. Can increase training speed, but can also cause errors (e.g. pickling errors). classification : bool (default = None) If True then perform classification (e.g. semantic segmentation) task, else perform regression task. If not defined then is read from from train dataset. num_classes: int (optional) Number of classes for classification task. If not defined then is read from train dataset. y_nodata : int or float (optional) You can define which value in y raster corresponds to nodata and areas that contain nodata in y raster will be ignored while training and testing. If not defined then is read from train dataset. **kwargs Additional keyword arguments that are used to initialise model. They are different for every model, so read the documentation. Returns ---------- torch.nn model or SklearnModel Trained model. Examples -------- >>> x_out, y_out = rsp.segmentation.generate_tiles( ... x, ... y, ... tile_size=256, ... shuffle=True, ... split=[3, 1, 1], ... split_names=['train', 'val', 'test'] ... ) >>> train_ds = [x_out, y_out[0], 'train'] >>> val_ds = [x_out, y_out[0], 'val'] >>> model = rsp.segmentation.train( ... train_ds, ... val_ds, ... model='UperNet', ... backbone='ConvNeXTV2', ... model_file='/home/rsp_test/model/upernet.ckpt', ... epochs=100, ... batch_size=32 ... ) GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params ----------------------------------------------------------- 0 | model | UperNetForSemanticSegmentation | 59.8 M 1 | loss_fn | CrossEntropyLoss | 0 ----------------------------------------------------------- 59.8 M Trainable params 0 Non-trainable params 59.8 M Total params 239.395 Total estimated model params size (MB) Epoch 9: 100% ############################################# 223/223 [1:56:20<00:00, 31.30s/it, v_num=54, train_loss_step=0.326, train_acc_step=0.871, train_auroc_step=0.796, train_iou_step=0.655, val_loss_step=0.324, val_acc_step=0.869, val_auroc_step=0.620, val_iou_step=0.678, val_loss_epoch=0.334, val_acc_epoch=0.807, val_auroc_epoch=0.795, val_iou_epoch=0.688, train_loss_epoch=0.349, train_acc_epoch=0.842, train_auroc_epoch=0.797, train_iou_epoch=0.648] `Trainer.fit` stopped: `max_epochs=10` reached. >>> x_mo = '/home/rsp_test/model/x_montana.zarr' >>> y_mo = '/home/rsp_test/model/y_montana.zarr' >>> x_id = '/home/rsp_test/model/x_idaho.zarr' >>> y_id = '/home/rsp_test/model/y_idaho.zarr' >>> # Training on two different datasets - one from Montana and one from Idaho >>> train_datasets = [[x_mo, y_mo, ['area_1', 'area_2']], [x_id, y_id, ['area_3', 'area_6', 'area8']]] >>> val_datasets = [[x_mo, y_mo, ['area_3', 'area_4']], [x_id, y_id, ['area_1']]] >>> model = rsp.segmentation.train( ... train_datasets, ... val_datasets, ... model='UperNet', ... backbone='ConvNeXTV2', ... model_file='/home/rsp_test/model/upernet.ckpt', ... epochs=100, ... batch_size=32 ... ) GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params ----------------------------------------------------------- 0 | model | UperNetForSemanticSegmentation | 59.8 M 1 | loss_fn | CrossEntropyLoss | 0 ----------------------------------------------------------- 59.8 M Trainable params 0 Non-trainable params 59.8 M Total params 239.395 Total estimated model params size (MB) Epoch 99: 100% ############################################# 223/223 [1:56:20<00:00, 31.30s/it, v_num=54, train_loss_step=0.326, train_acc_step=0.871, train_auroc_step=0.796, train_iou_step=0.655, val_loss_step=0.324, val_acc_step=0.869, val_auroc_step=0.620, val_iou_step=0.678, val_loss_epoch=0.334, val_acc_epoch=0.807, val_auroc_epoch=0.795, val_iou_epoch=0.688, train_loss_epoch=0.349, train_acc_epoch=0.842, train_auroc_epoch=0.797, train_iou_epoch=0.648] `Trainer.fit` stopped: `max_epochs=100` reached. """ # Type checking if not isinstance(train_datasets[0], list): train_datasets = [train_datasets] for i in range(len(train_datasets)): if len(train_datasets[i]) != 3: raise ValueError("Every dataset must consist of x, y and names") if not isinstance(train_datasets[i][0], str) and not isinstance(train_datasets[i][0], xarray.DataArray): raise TypeError("x in dataset must be a string or xarray.DataArray") elif isinstance(train_datasets[i][0], str) and not os.path.exists(train_datasets[i][0]): raise OSError(str(train_datasets[i][0]) + " does not exist") if not isinstance(train_datasets[i][1], str) and not isinstance(train_datasets[i][1], xarray.DataArray): raise TypeError("y in dataset must be a string or xarray.DataArray") elif isinstance(train_datasets[i][1], str) and not os.path.exists(train_datasets[i][1]): raise OSError(str(train_datasets[i][1]) + " does not exist") if not isinstance(train_datasets[i][2], str) and not isinstance(train_datasets[i][2], list): raise TypeError("name in dataset must be a string or a list") else: if train_datasets[i][2] != 'all' and isinstance(train_datasets[i][2], str): train_datasets[i][2] = [train_datasets[i][2]] if val_datasets != None: if not isinstance(val_datasets[0], list): val_datasets = [val_datasets] for i in val_datasets: if len(i) != 3: raise ValueError("Every dataset must consist of x, y and names") if not isinstance(i[0], str) and not isinstance(i[0], xarray.DataArray): raise TypeError("x in dataset must be a string or xarray.DataArray") elif isinstance(i[0], str) and not os.path.exists(i[0]): raise OSError(str(i[0]) + " does not exist") if not isinstance(i[1], str) and not isinstance(i[1], xarray.DataArray): raise TypeError("y in dataset must be a string or xarray.DataArray") elif isinstance(i[1], str) and not os.path.exists(i[1]): raise OSError(str(i[1]) + " does not exist") if not isinstance(i[2], str) and not isinstance(i[2], list): raise TypeError("name in dataset must be a string or a list") if not isinstance(model_file, str): raise TypeError("model_file must be a string") if not isinstance(model, str): raise TypeError("model must be a string") if not isinstance(backbone, str) and not isinstance(backbone, type(None)): raise TypeError("backbone must be a string") if not isinstance(checkpoint, str) and not isinstance(checkpoint, type(None)): raise TypeError("checkpoint must be a string") elif isinstance(checkpoint, str) and not os.path.exists(checkpoint): raise OSError(checkpoint + " does not exist") if not isinstance(weights, str) and not isinstance(weights, type(None)): raise TypeError("weights must be a string") if not isinstance(epochs, int): if isinstance(epochs, type(None)): epochs = 5 else: raise TypeError("epochs must be an integer") if not isinstance(batch_size, int): if isinstance(batch_size, type(None)): batch_size = 32 else: raise TypeError("batch_size must be an integer") if not isinstance(repeat, int): if isinstance(repeat, type(None)): repeat = 1 else: raise TypeError("repeat must be an integer") elif repeat < 1: raise ValueError("repeat must be >= 1") if not isinstance(augment, bool): if isinstance(augment, type(None)): augment = False else: raise TypeError("augment must be boolean") if not isinstance(less_metrics, bool): if isinstance(less_metrics, type(None)): less_metrics = False else: raise TypeError("less_metrics must be boolean") if not isinstance(lr, float): if isinstance(lr, type(None)): lr = 1e-3 else: raise TypeError("lr must be float") if ( (not isinstance(num_workers, int) and num_workers != 'auto') or (isinstance(num_workers, int) and num_workers < 0) ): if isinstance(num_workers, type(None)): num_workers = 'auto' else: raise TypeError("num_workers must be non-negative integer or 'auto'") if not isinstance(classification, bool) and not isinstance(classification, type(None)): raise TypeError("classification must be boolean or None") if not isinstance(num_classes, int) and not isinstance(num_classes, type(None)): raise TypeError("num_classes must be int or None") if not isinstance(y_nodata, int) and not isinstance(y_nodata, float) and not isinstance(y_nodata, type(None)): raise TypeError("y_nodata must be int or float or None") cuda = cuda_test() if cuda == False: warnings.warn('CUDA or MPS is not available. Training on CPU could be very slow.') model = segmentation_train( train_datasets=train_datasets, val_datasets=val_datasets, model=model, backbone=backbone, checkpoint=checkpoint, weights=weights, model_file=model_file, epochs=epochs, batch_size=batch_size, augment=augment, repeat=repeat, classification=classification, num_classes=num_classes, y_nodata=y_nodata, less_metrics=less_metrics, lr=lr, num_workers=num_workers, **kwargs ) return model
[docs] def test(test_datasets, model, batch_size=32, num_workers=0): """ Tests segmentation model. Parameters ---------- test_datasets : list or list of lists Test data generated by generate_tiles() function. Each dataset is a list of 3 elements: training data (x): file path or xarray.DataArray, target variable (y): file path or xarray.DataArray, split_names: string or list of strings or 'all' if you need to use the whole dataset. You can provide a list of datasets to test model on multiple datasets. model : torch.nn model or SklearnModel or path to a model file Model to test. You can pass the model object returned by `train()` function or file (*.ckpt or *.joblib) where model is stored. batch_size : int (default = 32) Number of samples used in one iteration. num_workers: int or 'auto' (default = 0) Number of parallel workers that will load the data. Set 'auto' to let RSP choose the optimal number of workers, set 0 to disable multiprocessing. Can increase training speed, but can also cause errors (e.g. pickling errors). Examples -------- >>> x_out, y_out = rsp.segmentation.generate_tiles( ... x, ... y, ... tile_size=256, ... shuffle=True, ... split=[3, 1, 1], ... split_names=['train', 'val', 'test'] ... ) >>> train_ds = [x_out, y_out[0], 'train'] >>> val_ds = [x_out, y_out[0], 'val'] >>> test_ds = [x_out, y_out[0], 'test'] >>> model = rsp.segmentation.train( ... train_ds, ... val_ds, ... model='UperNet', ... backbone='ConvNeXTV2', ... model_file='/home/rsp_test/model/upernet.ckpt', ... epochs=10, ... batch_size=32 ) ... >>> rsp.segmentation.test(test_ds, model=model, batch_size=32) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Test metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ test_acc_epoch │ 0.8231202960014343 │ │ test_auroc_epoch │ 0.7588028311729431 │ │ test_iou_epoch │ 0.69323649406433105 │ │ test_loss_epoch │ 0.40799811482429504 │ │ test_precision_epoch │ 0.8231202960014343 │ │ test_recall_epoch │ 0.8231202960014343 │ └───────────────────────────┴───────────────────────────┘ """ # Type checking if not isinstance(test_datasets[0], list): test_datasets = [test_datasets] for i in test_datasets: if len(i) != 3: raise ValueError("Every dataset must consist of x, y and names") if not isinstance(i[0], str) and not isinstance(i[0], xarray.DataArray): raise TypeError("x in dataset must be a string or xarray.DataArray") elif isinstance(i[0], str) and not os.path.exists(i[0]): raise OSError(i[0] + " does not exist") if not isinstance(i[1], str) and not isinstance(i[1], xarray.DataArray): raise TypeError("y in dataset must be a string or xarray.DataArray") elif isinstance(i[1], str) and not os.path.exists(i[1]): raise OSError(i[1] + " does not exist") if not isinstance(i[2], str) and not isinstance(i[2], list): raise TypeError("name in dataset must be a string or a list") if not isinstance(batch_size, int): if isinstance(batch_size, type(None)): batch_size = 32 else: raise TypeError("batch_size must be an integer") if ( (not isinstance(num_workers, int) and num_workers != 'auto') or (isinstance(num_workers, int) and num_workers < 0) ): if isinstance(num_workers, type(None)): num_workers = 'auto' else: raise TypeError("num_workers must be non-negative integer or 'auto'") cuda = cuda_test() if cuda == False: warnings.warn('CUDA or MPS is not available. Testing on CPU could be very slow.') segmentation_test(test_datasets = test_datasets, model = model, batch_size = batch_size, num_workers = num_workers)
[docs] def generate_map( x, y, reference, model, output, batch_size=32, num_workers=0, nodata=None ): """ Create map using pre-trained model. Parameters ---------- x : path as a string or xarray.DataArray Training data (x) generated by generate_tiles() function that will be used for prediction. y : path as a string or xarray.DataArray Target variable data (y) generated by generate_tiles() function that was used to train the model. reference : path as a string Raster that will be used as a reference raster to get size, transform and crs for a map. Use one of the rasters that were used for tile generation. model : torch.nn model or SklearnModel or path to a model file Pre-trained model to predict target values. You can pass the model object returned by `train()` function or file (*.ckpt or *.joblib) where model is stored. output : path as a string Path where to write output map. batch_size : int (default = 32) Number of samples used in one iteration. num_workers: int or 'auto' (default = 0) Number of parallel workers that will load the data. Set 'auto' to let RSP choose the optimal number of workers, set 0 to disable multiprocessing. Can increase training speed, but can also cause errors (e.g. pickling errors). nodata : int or float (optional) Nodata value. If not defined then nodata value of y dataset will be used. Examples -------- >>> x_out, y_out = rsp.segmentation.generate_tiles( ... x, ... y, ... tile_size=256, ... shuffle=True, ... split=[3, 1, 1], ... split_names=['train', 'val', 'test'] ... ) >>> train_ds = [x_out, y_out[0], 'train'] >>> val_ds = [x_out, y_out[0], 'val'] >>> model = rsp.segmentation.train( ... train_ds, ... val_ds, ... model='UperNet', ... backbone='ConvNeXTV2', ... model_file='/home/rsp_test/model/upernet.ckpt', ... epochs=10, ... batch_size=32 ... ) ... >>> reference = '/home/rsp_test/mosaics/landcover.tif' >>> output_map = '/home/rsp_test/prediction.tif' >>> rsp.segmentation.generate_map(x_out, y_out[0], reference, model, output_map) Predicting: 100% #################### 372/372 [32:16, 1.6s/it] >>> x_file = '/home/rsp_test/model/x.zarr' >>> y_file = '/home/rsp_test/model/y.zarr' >>> model = '/home/rsp_test/model/upernet.ckpt' >>> reference = '/home/rsp_test/mosaics/landcover.tif' >>> output_map = '/home/rsp_test/prediction.tif' >>> rsp.segmentation.generate_map(x_file, y_file, reference, model, output_map) Predicting: 100% #################### 372/372 [32:16, 1.6s/it] >>> # Train model on data from Montana >>> x_montana_files = glob('/home/rsp_test/mosaics/landsat_montana/*') >>> y_montana_files = '/home/rsp_test/mosaics/landcover_montana/landcover.tif' >>> x_montana, y_montana = rsp.segmentation.generate_tiles( ... x_montana_files, ... y_montana_files, ... tile_size=256, ... shuffle=True, ... split=[3, 1, 1], ... split_names=['train', 'val', 'test'] ... ) >>> train_ds = [x_montana, y_montana[0], 'train'] >>> val_ds = [x_montana, y_montana[0], 'val'] >>> model_montana = rsp.segmentation.train( ... train_ds, ... val_ds, ... model='UperNet', ... backbone='ConvNeXTV2', ... model_file='/home/rsp_test/model/upernet.ckpt', ... epochs=10, ... batch_size=32 ... ) ... >>> # Use model to map landcover of Idaho >>> x_idaho_files = glob('/home/rsp_test/mosaics/landsat_idaho/*') >>> x_idaho, _ = rsp.segmentation.generate_tiles(x_idaho_files, None, tile_size=256) >>> reference = x_idaho_files[0] >>> output_map = '/home/rsp_test/prediction_idaho.tif' >>> rsp.segmentation.generate_map(x_idaho, y_montana, reference, model_montana, output_map) Predicting: 100% #################### 372/372 [32:16, 1.6s/it] """ # Type checking if not isinstance(x, str) and not isinstance(x, xarray.DataArray): raise TypeError("x must be a string or xarray.DataArray") elif isinstance(x, str) and not os.path.exists(x): raise OSError(x + " does not exist") if not isinstance(y, str) and not isinstance(y, xarray.DataArray): raise TypeError("y must be a string or xarray.DataArray") elif isinstance(y, str) and not os.path.exists(y): raise OSError(x + " does not exist") if not isinstance(reference, str): raise TypeError("reference must be a string") elif not os.path.exists(reference): raise OSError(reference + " does not exist") if not isinstance(output, str): raise TypeError("output must be a string") if not isinstance(batch_size, int): if isinstance(batch_size, type(None)): batch_size = 32 else: raise TypeError("batch_size must be an integer") if ( (not isinstance(num_workers, int) and num_workers != 'auto') or (isinstance(num_workers, int) and num_workers < 0) ): if isinstance(num_workers, type(None)): num_workers = 'auto' else: raise TypeError("num_workers must be non-negative integer or 'auto'") if not isinstance(nodata, int) and not isinstance(nodata, float) and not isinstance(nodata, type(None)): raise TypeError("nodata must be integer or float") cuda = cuda_test() if cuda == False and superres == True: warnings.warn('CUDA or MPS is not available. Prediction on CPU could be very slow.') predict_map_from_tiles( x=x, y=y, reference=reference, model=model, output=output, nodata=nodata, batch_size=batch_size, num_workers=num_workers, )