Source code for soilstats.soilcollect

import random
import pandas as pd
from .analysis.analysis import Analyse
from .soildata import SoilData


[docs] class SoilCollect(Analyse): """Class for collecting soil data for multiple locations within a grid.""" def __init__(self, properties, depths, values, lat_bounds, lon_bounds, n = 50): """Set up SoilGrids data query for multiple points within an area. For available properties, depths, and values to return: consult the SoilGrids API documentation (https://rest.isric.org/soilgrids/v2.0/docs#/default/query_layer_properties_properties_query_get). Args: properties (str or list): soil properties to query; e.g. ["clay", "sand"] depths (str or list): depth range(s); e.g. ["0-5cm", "0-30cm"] values (str or list): values to return; e.g. "mean" lat_bounds (list[floats]): boundaries for latitude; e.g. [50, 55] lon_bounds (list[floats]): boundaries for longitude; e.g. [10, 15] n (int, optional): number of locations to sample within the boundaries. Defaults to 50. """ self.properties = properties self.depths = depths self.values = values self.latitude = [min(lat_bounds), max(lat_bounds)] self.longitude = [min(lon_bounds), max(lon_bounds)] self._locations = self._random_points(lat_bounds, lon_bounds, n) self._sdpoints = self._init_soildata()
[docs] def get_data(self): """Return data from the SoilGrids API as a data frame.""" dfs = [sd.get_data() for sd in self._sdpoints] empty = sum(df.empty for df in dfs) pointswdata = len(dfs) - empty print(f"Data from {pointswdata} points. {empty} out of {len(dfs)} locations returned no data.") print("Run .add_points(n) to add more points to the SoilCollect object.") return pd.concat(dfs, ignore_index=True)
[docs] def add_points(self, n): """Add more datapoints to the existing SoilCollect object.""" new_locations = self._random_points(self.latitude, self.longitude, n) self._locations += new_locations new_datapoints = self._init_soildata(new_locations) self._sdpoints += new_datapoints
[docs] def _init_soildata(self, points = None): """Initialize SoilData objects for each location.""" if points is None: points = self._locations return [SoilData(lat, lon, properties=self.properties, depths=self.depths, values=self.values) for lat, lon in points]
@property
[docs] def locations(self): """Return locations.""" return self._locations
@property
[docs] def soildatapoints(self): """Return SoilData objects.""" #TODO return them in a nice & logical way return self._sdpoints
@classmethod
[docs] def _random_points(cls, lat_bounds, lon_bounds, n): """Generate random locations within the specified boundaries.""" min_lat, max_lat = cls._verify_bounds(lat_bounds, "lat") min_lon, max_lon = cls._verify_bounds(lon_bounds, "lon") lats = [random.uniform(min_lat, max_lat) for _ in range(n)] lons = [random.uniform(min_lon, max_lon) for _ in range(n)] return list(zip(lats, lons))
@classmethod
[docs] def _verify_bounds(cls, bounds, datatype): #TODO boundaries should also work on spherical earth! if len(bounds) != 2: raise IndexError(f"{datatype}_bounds requires 2 values, got {len(bounds)}") minvalue = SoilData._verify(min(bounds), datatype) maxvalue = SoilData._verify(max(bounds), datatype) return minvalue, maxvalue