Asking for the question"TypeError: cannot perform reduce with flexible type" using preprocessing.
I used "import pyspark.pandas as pd" reading csv file and using "from sklearn import preprocessing" for preprocessing. There is no error before using "df_scaled = preprocessing.scale(df).astype(np.float)" Here are the codes:
import numpy as np
import pyspark.pandas as pd
import warnings
from sklearn import preprocessing
warnings.filterwarnings("ignore")
df = pd.read_csv('diabetes.csv')
print(df.head())
print(df.describe())
print("Number of rows with 0 values for each variable")
for col in df.columns:
missing_rows = df.loc[df[col] == 0].shape[0]
print(col + ": " + str(missing_rows))
df['Glucose'] = df['Glucose'].replace(0, df['Glucose'].mean())
df['BloodPressure'] = df['BloodPressure'].replace(0, df['BloodPressure'].mean())
df['SkinThickness'] = df['SkinThickness'].replace(0, df['SkinThickness'].mean())
df['Insulin'] = df['Insulin'].replace(0, df['Insulin'].mean())
df['BMI'] = df['BMI'].replace(0, df['BMI'].mean())
print("Number of rows with 0 values for each variable")
for col in df.columns:
missing_rows = df.loc[df[col] == 0].shape[0]
print(col + ": " + str(missing_rows))
print(df.head())
df_prescaled = df.copy().astype(np.float)
df_scaled = preprocessing.scale(df).astype(np.float)
df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
df_scaled['Outcome'] = df['Outcome']
df = df_scaled
print(df.head())
And the results is:
Traceback (most recent call last):
File "D:\project1\Spark.py", line 25, in <module>
df_scaled = preprocessing.scale(df).astype(np.float)
File "D:\Python3.6\lib\site-packages\sklearn\utils\validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "D:\Python3.6\lib\site-packages\sklearn\preprocessing\_data.py", line 179, in scale
mean_ = np.nanmean(X, axis)
File "<__array_function__ internals>", line 6, in nanmean
File "D:\Python3.6\lib\site-packages\numpy\lib\nanfunctions.py", line 939, in nanmean
return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
File "<__array_function__ internals>", line 6, in mean
File "D:\Python3.6\lib\site-packages\numpy\core\fromnumeric.py", line 3373, in mean
out=out, **kwargs)
File "D:\Python3.6\lib\site-packages\numpy\core\_methods.py", line 160, in _mean
ret = umr_sum(arr, axis, dtype, out, keepdims)
TypeError: cannot perform reduce with flexible type
Process finished with exit code 1