Retrieve data from a .grib file for a specific latitude/longitude and convert it to a .csv file

33 Views Asked by At

I am working with a .grib file in python. My goal is to create a function that would take a latitude and a longitude as an input and would retrieve all the data in the .grib file (temperature, wind speed/Direction, humidity at several times and pressure levels) for a specific point in the grid (i.e a specific latitude and longitude) and would convert this in a .csv file.

Here the code that I wrote but I get an error:

import xarray as xr
import cfgrib

#Loading the dataset
DATA = xr.open_dataset(data_path, engine='cfgrib')

lat = DATA.latitude
lon = DATA.longitude 

#Create dictionary for latitude
lat_list = [value for value in lat.values.tolist()]
lat_index = list(range(len(lat_list)))
lat_dict = dict(zip(lat_list,lat_index))

#Create dictionary for longitude 
lon_list = [value for value in lon.values.tolist()]
lon_index = list(range(len(lon_list)))
lon_dict = dict(zip(lon_list,lon_index))

def grid_point_data(data, latitude, longitude, csv_path):

    #Longitude goes from 50.0 to 25.0 by increment of 0.5
    #Latitude goes from -125.0 to -64.0 by increment of 0.25
    
    formated_latitude = min(lat_list, key=lambda x:abs(x-latitude))
    formated_longitude = min(lon_list, key=lambda x:abs(x-longitude))
    print(formated_latitude,formated_longitude)
    
    index_latitude = lat_dict[formated_latitude]
    index_longitude = lon_dict[formated_longitude]
    
    wind_u = data.u[:,:,index_latitude,index_longitude]
    wind_v = data.v[:,:,index_latitude,index_longitude]
    wind_w = data.w[:,:,index_latitude,index_longitude]
    relative_humidity = data.q[:,:,index_latitude,index_longitude]
    temperature = data.t[:,:,index_latitude,index_longitude]

    df = wind_u.to_dataframe()
    df.to_csv(csv_path)

    return(f'Data has been transfered to {csv_path})

The shape of the DATA file is the following:

<xarray.Dataset>
Dimensions:        (time: 456, isobaricInhPa: 37, latitude: 101, longitude: 245)
Coordinates:
    number         int32 ...
  * time           (time) datetime64[ns] 2015-09-22 ... 2015-10-31T23:00:00
    step           timedelta64[ns] ...
  * isobaricInhPa  (isobaricInhPa) float64 1e+03 975.0 950.0 ... 3.0 2.0 1.0
  * latitude       (latitude) float64 50.0 49.75 49.5 49.25 ... 25.5 25.25 25.0
  * longitude      (longitude) float64 -125.0 -124.8 -124.5 ... -64.25 -64.0
    valid_time     (time) datetime64[ns] ...
Data variables:
    q              (time, isobaricInhPa, latitude, longitude) float32 ...
    t              (time, isobaricInhPa, latitude, longitude) float32 ...
    u              (time, isobaricInhPa, latitude, longitude) float32 ...
    v              (time, isobaricInhPa, latitude, longitude) float32 ...
    w              (time, isobaricInhPa, latitude, longitude) float32 ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2024-03-11T16:08 GRIB to CDM+CF via cfgrib-0.9.1...

What is interesting is that if I have only one latitude and longitude in my file I can run the following code to create a csv:

def transfer_grib_to_csv(grib_path, csv_path, month):      
    ds = xr.open_dataset(grib_path, engine='cfgrib')
    df = ds.to_dataframe()
    #new_csv_path = f"{csv_path}{month}.csv"
    df.to_csv(csv_path, index=True)
    print(f"Données transférées avec succès vers {csv_path}")

which gives me a csv file with the following format: csv_file for 1 latitude and 1 longitude (BTW, this is the shape I would like to achieve)

The shape of the file here is: <xarray.Dataset> Dimensions: (time: 408, isobaricInhPa: 32, latitude: 1, longitude: 1) Coordinates: number int32 ...

  • time (time) datetime64[ns] 2015-09-11 ... 2015-09-30T23:00:00 step timedelta64[ns] ...
  • isobaricInhPa (isobaricInhPa) float64 875.0 850.0 825.0 ... 3.0 2.0 1.0
  • latitude (latitude) float64 33.61
  • longitude (longitude) float64 -84.46 valid_time (time) datetime64[ns] ... Data variables: q (time, isobaricInhPa, latitude, longitude) float32 ... crwc (time, isobaricInhPa, latitude, longitude) float32 ... t (time, isobaricInhPa, latitude, longitude) float32 ... u (time, isobaricInhPa, latitude, longitude) float32 ... v (time, isobaricInhPa, latitude, longitude) float32 ... w (time, isobaricInhPa, latitude, longitude) float32 ... Attributes: GRIB_edition: 1 GRIB_centre: ecmf GRIB_centreDescription: European Centre for Medium-Range Weather Forecasts GRIB_subCentre: 0 Conventions: CF-1.7 institution: European Centre for Medium-Range Weather Forecasts history: 2024-03-11T16:09 GRIB to CDM+CF via cfgrib-0.9.1...

But the shape for 'wind_u' in the first function is:

<xarray.Dataset>
Dimensions:        (time: 456, isobaricInhPa: 37)
Coordinates:
    number         int32 ...
  * time           (time) datetime64[ns] 2015-09-22 ... 2015-10-31T23:00:00
    step           timedelta64[ns] ...
  * isobaricInhPa  (isobaricInhPa) float64 1e+03 975.0 950.0 ... 3.0 2.0 1.0
    latitude       float64 36.75
    longitude      float64 -79.25
    valid_time     (time) datetime64[ns] ...
Data variables:
    u              (time, isobaricInhPa) float32 ...

It doesn't specify latitude and longitude as a dimension anymore.

Here is the error I get when I ran "grid_point_data(DATA,36.6844,-79.295, csv_path)":

  File "c:\Users\tberberian3\Documents\scripts\grid_point_data.py", line 108, in <module>
    grid_point_data(DATA,36.6844,-79.295, csv_path)
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\Documents\scripts\grid_point_data.py", line 70, in grid_point_data
    df = wind_u.to_dataframe()
         ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\dataarray.py", line 3776, in to_dataframe
    df = ds._to_dataframe(ordered_dims)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\dataset.py", line 6253, in _to_dataframe
    data = [
           ^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\dataset.py", line 6254, in <listcomp>
    self._variables[k].set_dims(ordered_dims).values.reshape(-1)
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\variable.py", line 1721, in set_dims
    expanded_data = self.data
                    ^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\variable.py", line 435, in data
    return self._data.get_duck_array()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\indexing.py", line 696, in get_duck_array
    self._ensure_cached()
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\indexing.py", line 690, in _ensure_cached
    self.array = as_indexable(self.array.get_duck_array())
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\indexing.py", line 664, in get_duck_array
    return self.array.get_duck_array()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\indexing.py", line 551, in get_duck_array
    array = self.array[self.key]
            ~~~~~~~~~~^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\xarray_plugin.py", line 155, in __getitem__
    return xr.core.indexing.explicit_indexing_adapter(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\xarray\core\indexing.py", line 858, in explicit_indexing_adapter
    result = raw_indexing_method(raw_key.tuple)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\xarray_plugin.py", line 164, in _getitem
    return self.array[key]
           ~~~~~~~~~~^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\dataset.py", line 358, in __getitem__
    message = self.index.get_field(message_ids[0])  # type: ignore
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\messages.py", line 484, in get_field
    return ComputedKeysAdapter(self.fieldset[message_id], self.computed_keys)
                               ~~~~~~~~~~~~~^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\messages.py", line 344, in __getitem__
    return self.message_from_file(file, offset=item)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\messages.py", line 340, in message_from_file
    return Message.from_file(file, offset, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tberberian3\AppData\Local\anaconda3\envs\sosenv\Lib\site-packages\cfgrib\messages.py", line 93, in from_file
    file.seek(offset)
OSError: [Errno 22] Invalid argument

I also tried to use wind_u.values.tolist() method but it did not work

Thank you very much for your help.

0

There are 0 best solutions below