User Memory Limit Exceed GEE API

112 Views Asked by At

I am trying to extract forcing ERA5 data of spatial extent 3by3. Below is partial function code. When I am running the main file that calls the function, I am getting an error that User memory limit exceed. When I tried bounding box method, I was getting error like **111532 points x 8 bands x 643843 images > 1048576. **

def fromGEE_to_df(dates, era5_land, era_poi):

    # scale in meters
    scale = 1000

    # initialize storage df
    era5_df = pd.DataFrame(columns=['datetime',
                                    'temperature_2m',
                                    'dewpoint_temperature_2m',
                                    'surface_solar_radiation_downwards_hourly',
                                    'surface_thermal_radiation_downwards_hourly',
                                    'total_precipitation_hourly',
                                    'u_component_of_wind_10m',
                                    'v_component_of_wind_10m',
                                    'surface_pressure'])

    for dat in range(len(dates)-1):

        i_dt_tmp = dates[dat]
        f_dt_tmp = dates[dat+1]

        # Selection of appropriate bands and dates for LST.
        era5_selc = era5_land.select('temperature_2m',
                                     'dewpoint_temperature_2m',
                                     'surface_solar_radiation_downwards_hourly',
                                     'surface_thermal_radiation_downwards_hourly',
                                     'total_precipitation_hourly',
                                     'u_component_of_wind_10m',
                                     'v_component_of_wind_10m',
                                     'surface_pressure')
        
        # Create a bounding box around the point of interest
        #bbox = ee.Geometry.Rectangle([era_poi.getInfo()["coordinates"][0] - 1.5,
                                      #era_poi.getInfo()["coordinates"][1] - 1.5,
                                      #era_poi.getInfo()["coordinates"][0] + 1.5,
                                      #era_poi.getInfo()["coordinates"][1] + 1.5])
      
        #era5_data = era5_selc.getRegion(bbox, scale).getInfo()
        
        #era5_data = era5_selc.getRegion(era_poi, scale).getInfo()
        era5_data = era5_selc.getRegion(era_poi.buffer(1.5, scale), scale).getInfo()
import ee
import myfuns as myf

# forcing name
nc_name = "MuSA_forz.nc"

# Initial date of interest (inclusive).
i_date = "2018-09-30"

# Final date of interest (exclusive).
f_date = "2019-07-31"

# Define the locations of interest in degrees.
#era_lon = [-106.98980772326122]
#era_lat = [38.932945228166]
era_lon = [-106.98980772326122, -106.98980772326123, -106.98980772326124]
era_lat = [38.932945228166, 38.932945228167, 38.932945228168]

###
# ------- Hopefully it will not be necessary to touch under this line ------- #
###

# Create netcdf
myf.init_netcdf(nc_name, era_lon, era_lat, i_date, f_date)

# Initialize the library.
ee.Initialize()

# Import the ERA5_LAND collection.
era5_land = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")

# Separate dates in seq years to avoid EEException: User memory limit exceeded.
dates = myf.seq_dates(i_date, f_date)

# Check if same n of lat & lon
if len(era_lon) != len(era_lat):
    raise Exception('check coordinates')

# Loop over coordinates
for n in range(len(era_lon)):

    print("Solving cell: " + str(n+1)+" of " + str(len(era_lon)))
    # Construct a point from coordinates.
    era_poi = ee.Geometry.Point([era_lon[n], era_lat[n]])

    # get pandas df form GEE
    era5df = myf.fromGEE_to_df(dates, era5_land, era_poi)

    # prepare columns for MuSA
    MuSA_era5 = myf.format_forz(era5df)

    myf.store_era_nc(nc_name, MuSA_era5, n)

One solution for me is to switch to Google cloud computing to remove this limitation and download big dataset but I was wondering if there is a way to improve code and download forcing file for small spatial extent?

0

There are 0 best solutions below