I am trying to extract forcing ERA5 data of spatial extent 3by3. Below is partial function code. When I am running the main file that calls the function, I am getting an error that User memory limit exceed. When I tried bounding box method, I was getting error like **111532 points x 8 bands x 643843 images > 1048576. **
def fromGEE_to_df(dates, era5_land, era_poi):
# scale in meters
scale = 1000
# initialize storage df
era5_df = pd.DataFrame(columns=['datetime',
'temperature_2m',
'dewpoint_temperature_2m',
'surface_solar_radiation_downwards_hourly',
'surface_thermal_radiation_downwards_hourly',
'total_precipitation_hourly',
'u_component_of_wind_10m',
'v_component_of_wind_10m',
'surface_pressure'])
for dat in range(len(dates)-1):
i_dt_tmp = dates[dat]
f_dt_tmp = dates[dat+1]
# Selection of appropriate bands and dates for LST.
era5_selc = era5_land.select('temperature_2m',
'dewpoint_temperature_2m',
'surface_solar_radiation_downwards_hourly',
'surface_thermal_radiation_downwards_hourly',
'total_precipitation_hourly',
'u_component_of_wind_10m',
'v_component_of_wind_10m',
'surface_pressure')
# Create a bounding box around the point of interest
#bbox = ee.Geometry.Rectangle([era_poi.getInfo()["coordinates"][0] - 1.5,
#era_poi.getInfo()["coordinates"][1] - 1.5,
#era_poi.getInfo()["coordinates"][0] + 1.5,
#era_poi.getInfo()["coordinates"][1] + 1.5])
#era5_data = era5_selc.getRegion(bbox, scale).getInfo()
#era5_data = era5_selc.getRegion(era_poi, scale).getInfo()
era5_data = era5_selc.getRegion(era_poi.buffer(1.5, scale), scale).getInfo()
import ee
import myfuns as myf
# forcing name
nc_name = "MuSA_forz.nc"
# Initial date of interest (inclusive).
i_date = "2018-09-30"
# Final date of interest (exclusive).
f_date = "2019-07-31"
# Define the locations of interest in degrees.
#era_lon = [-106.98980772326122]
#era_lat = [38.932945228166]
era_lon = [-106.98980772326122, -106.98980772326123, -106.98980772326124]
era_lat = [38.932945228166, 38.932945228167, 38.932945228168]
###
# ------- Hopefully it will not be necessary to touch under this line ------- #
###
# Create netcdf
myf.init_netcdf(nc_name, era_lon, era_lat, i_date, f_date)
# Initialize the library.
ee.Initialize()
# Import the ERA5_LAND collection.
era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
# Separate dates in seq years to avoid EEException: User memory limit exceeded.
dates = myf.seq_dates(i_date, f_date)
# Check if same n of lat & lon
if len(era_lon) != len(era_lat):
raise Exception('check coordinates')
# Loop over coordinates
for n in range(len(era_lon)):
print("Solving cell: " + str(n+1)+" of " + str(len(era_lon)))
# Construct a point from coordinates.
era_poi = ee.Geometry.Point([era_lon[n], era_lat[n]])
# get pandas df form GEE
era5df = myf.fromGEE_to_df(dates, era5_land, era_poi)
# prepare columns for MuSA
MuSA_era5 = myf.format_forz(era5df)
myf.store_era_nc(nc_name, MuSA_era5, n)
One solution for me is to switch to Google cloud computing to remove this limitation and download big dataset but I was wondering if there is a way to improve code and download forcing file for small spatial extent?