FutureWarning: Dropping of nuisance columns in DataFrame reductions with df.median

2.2k Views Asked by At

I have tried lots of things and can't figure out how to do the medians of the columns I need. I dont dont why they are "nuisance columns"

The warning is:

FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.

here is my code:

def readcsv(folder, ICs):
    result = []
    for dirname, dirs, files in os.walk(data_dir + folder):
        """dirname = folder under inspection & dirs = folders"""
        for filename in files:
            path = os.path.join(dirname, filename)
            if 'local-results-' + ICs in filename:
                df = pd.read_csv(path, nrows=13, skiprows=np.arange(0, 5, 1))
                mes = pd.read_csv(path, nrows=19, skiprows=np.arange(0, 20, 1))
                medsT = mes.set_index('Aq').transpose()  # the data frame transposed
                result.append([df, medsT, filename])
    return result


def find_results():  # directory where data is (datadir or procdatadir atm),
    """Return results as directories data path as list"""
    V50 = readcsv("Adorian_Springhill_Lala-Full_Test_V50Beta0.5/", "V50Beta0.5")
    V10 = readcsv("Adorian_Springhill_Lala-Full_Test_V10Beta0.5/", "V10Beta0.5")
    return V50, V10  # V50 = [[df, medians, filename], ...]


def exclude_outliers(datapoint):
    timestamp = datapoint[2][10:16]
    print('plotting datapoint: ', timestamp)
    med, data, j = datapoint[1], datapoint[0], 0
    drop_indexs = []
    while j <= len(data) - 1:
        point = data.iloc[j, :]
        threshb = np.abs((med['Beta'] - point['Beta']) / med['Beta'])[0]
        threshv = np.abs((med['V'] - point['V']) / med['V'])[0]
        if threshv > 0.3:
            drop_indexs.append(j)
        elif threshb > 0.5:
            drop_indexs.append(j)
        j += 1
    data.drop(drop_indexs, inplace=True)
    # data = data.astype(float, errors='ignore')
    true_med = data.median(numeric_only=True)  # skipna=True by default
    print(true_med)
    return true_med, int(timestamp)


def plotmedians(all_data):
    """Plots all dataframes in the directory in results = find_results(directory)"""
    fig = plt.figure()
    plt.suptitle("Velocity, Motility and $\chi^2$ against Time", size='xx-large')
    gs = gridspec.GridSpec(2, 2)
    axv = fig.add_subplot(gs[0, 0])  # row, column
    axmot = fig.add_subplot(gs[0, 1])
    axchi = fig.add_subplot(gs[1, :])

    j = 0
    for key in legend_dict:
        dataset = all_data[j]
        i = 0
        for datapoint in dataset:
            med, time = exclude_outliers(datapoint)
            time = i * 5
            col, s = legend_dict[key][0], legend_dict[key][1]
            try:
                axv.errorbar(time, med['V'], yerr=med['ErrV'], ecolor=col, capsize=3, capthick=2,
                             elinewidth=1, zorder=2, c=col, fmt='^', ms=s)
                axmot.errorbar(time, med['Alpha']*100, yerr=med['ErrBeta']*100, ecolor=col, capsize=3,
                               capthick=2, elinewidth=1, zorder=2, c=col, fmt='^', ms=s)
                axchi.scatter(time, med['ChiSq'], c=col, marker='^', s=s*10)
            except:
                axv.errorbar(time, med['V'], ecolor=col, capsize=3, capthick=2,
                             elinewidth=1, zorder=2, c=col, fmt='^', ms=s)
                axmot.errorbar(time, med['Alpha']*100, ecolor=col, capsize=3,
                               capthick=2, elinewidth=1, zorder=2, c=col, fmt='^', ms=s)
            i += 1
        j += 1


plotmedians(find_results())

The median columns that are missing are these (after doing the median):

ErrAq            1.9194410627
ErrBq            0.0176539434
ErrV             0.0649970090
ErrS             6.1035284556
ErrA0            0.0509019872
ErrW0            0.5507963763
ErrBeta          0.0469381723
ErrD             0.1453676077
ErrF0            0.0876619659
lmfitObj                  NaN

I only really need 5 of the whole data frame but I have tried filtering for the ones I need and it still doesn't work.

1

There are 1 best solutions below

2
On

The warning is due to deprecated dropping nuisance columns in DataFrame. You can read more about it here. If you want to get rid of it, you should change this code true_med = data.median(numeric_only=True).

Have you tried selecting columns first?

data[["ErrAq","ErrBq","ErrV","ErrS","ErrA0"]].median()