I have the following xarray that contains the Spearman's Rank correlation at each grid point over Africa:
<xarray.DataArray (lat: 162, lon: 162)>
array([[ nan, nan, nan, ..., nan, nan,
nan],
[ nan, nan, nan, ..., nan, nan,
nan],
[ nan, nan, nan, ..., nan, nan,
nan],
...,
[ nan, nan, nan, ..., 0.40315942, 0.37931034,
0.43181313],
[ nan, nan, nan, ..., 0.37156525, 0.36329255,
0.38091 ],
[ nan, nan, nan, ..., 0.3579931 , 0.33615128,
0.34620091]])
Coordinates:
* lon (lon) float32 -20.25 -19.75 -19.25 -18.75 ... 59.25 59.75 60.25
* lat (lat) float32 -40.25 -39.75 -39.25 -38.75 ... 39.25 39.75 40.25
I would like to calculate the field significance using a bootstrap method to then only plot grid cells where the test is locally significant at the 5% level of field significance. I have the following code, which I think is doing the right thing:
# Number of bootstrap samples
n_bootstrap_samples = 1000
# Define a function to calculate field significance using bootstrap
def calculate_field_significance(data, n_bootstrap_samples):
# Get the dimensions of the data
lat, lon = data['lat'], data['lon']
n_lat, n_lon = len(lat), len(lon)
# Initialize an array to store bootstrap results
field_significance = np.empty((n_lat, n_lon))
for i in range(n_lat):
for j in range(n_lon):
# Select the data at the current grid cell
current_grid_cell_data = data[i, j].values # Access the numpy array
# Initialize an array to store bootstrap sample results
bootstrap_results = np.empty(n_bootstrap_samples)
for k in range(n_bootstrap_samples):
# Resample the data with replacement
bootstrap_sample = np.random.choice(current_grid_cell_data, len(current_grid_cell_data))
# Calculate Spearman's rank correlation coefficient for the bootstrap sample
bootstrap_statistic = xs.spearman_r(bootstrap_sample, current_grid_cell_data)
# Store the bootstrap sample statistic
bootstrap_results[k] = bootstrap_statistic
# Calculate the Spearman's rank correlation coefficient for the observed data
observed_statistic = xs.spearman_r(current_grid_cell_data, current_grid_cell_data)
# Calculate the p-value by comparing the observed statistic with the bootstrap distribution
p_value = (np.sum(bootstrap_results >= observed_statistic) + 1) / (n_bootstrap_samples + 1)
# Store the p-value as the field significance at the current grid cell
field_significance[i, j] = p_value
return xr.DataArray(field_significance, coords={'lat': lat, 'lon': lon}, dims=['lat', 'lon'])
# Calculate field significance for your dataset
field_significance = calculate_field_significance(cru_djf_cor, n_bootstrap_samples)
However, I get the following error when I run this code:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16096/3424556375.py in <module>
43
44 # Calculate field significance for your dataset
---> 45 field_significance = calculate_field_significance(cru_djf_cor, n_bootstrap_samples)
46
47 # You can now access field_significance to see the field significance values for each grid cell.
~\AppData\Local\Temp/ipykernel_16096/3424556375.py in calculate_field_significance(data, n_bootstrap_samples)
23 for k in range(n_bootstrap_samples):
24 # Resample the data with replacement
---> 25 bootstrap_sample = np.random.choice(current_grid_cell_data, len(current_grid_cell_data))
26
27 # Calculate Spearman's rank correlation coefficient for the bootstrap sample
TypeError: len() of unsized object
Please advise on why I am getting this error? Does the code look right?