How create blending ensemble properly and call as ussual Libary, how to integrate in below my model

23 Views Asked by At

Objective is create ensemble model including tuning params from weak learner and integrate with my model always problem

TypeError                                 Traceback (most recent call last)
Input In [19], in <cell line: 1>()
----> 1 ensemble.train_and_predict('MK1', model_name='Blending', param_grid=param_grid_bl)

Input In [15], in EnsembleRegressor.train_and_predict(self, sheet, model_name, param_grid)
     30     raise ValueError(f"Invalid model name: {model_name}")
     32 # Parameter tuning
---> 33 best_params, results = self.tune_parameters(model_class, x_train, y_train, param_grid)
     35 # Train the model with the best parameters
     36 model = model_class(**best_params)

Input In [15], in EnsembleRegressor.tune_parameters(self, model_class, X, y, param_grid)
     59 results = []
     61 for params in self.grid_search(param_grid):
---> 62     model = model_class(**params)
     63     model.fit(X, y)
     64     y_pred = model.predict(X)

TypeError: __call__() got an unexpected keyword argument 'knn_k'

This is my BlendingModel :

class BlendingEnsemble():
    def __init__(self, knn_k=5, rf_n_estimators=100, svr_c=1.0):
        self.models = []
        self.blender = None
        self.knn_k = knn_k
        self.rf_n_estimators = rf_n_estimators
        self.svr_c = svr_c

    def get_models(self):
        models = []
        models.append(('knn', KNeighborsRegressor(n_neighbors=self.knn_k)))
        models.append(('rf', RandomForestRegressor(n_estimators=self.rf_n_estimators)))
        models.append(('svm', SVR(C=self.svr_c)))
        return models


    def fit(self, X_train, y_train):
        self.models = self.get_models()

        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=1)

        # Fit all models on the training set and predict on the holdout set
        meta_X = []
        for name, model in self.models:
            # Fit model on the training set
            model.fit(X_train, y_train)
            # Predict on the holdout set
            yhat = model.predict(X_val)
            # Reshape predictions into a matrix with one column
            yhat = np.atleast_2d(yhat).T
            # Store predictions as input for blending
            meta_X.append(yhat)

        # Create 2D array from predictions, each set is an input feature
        meta_X = np.hstack(meta_X)

        # Define blending model
        blender = RandomForestRegressor()

        # Fit on predictions from base models
        blender.fit(meta_X, y_val)
        self.blender = blender

        return blender

    def predict(self, X_test):
        # Make predictions with base models
        meta_X = []
        for _, model in self.models:
            # Predict with base model
            yhat = model.predict(X_test)
            # Reshape predictions into a matrix with one column
            yhat = np.atleast_2d(yhat).T
            # Store prediction
            meta_X.append(yhat)

        # Create 2D array from predictions, each set is an input feature
        meta_X = np.hstack(meta_X)

        # Predict
        return self.blender.predict(meta_X)
    def evaluate_r2(self, y_true, y_pred):
        return r2_score(y_true, y_pred)

    def evaluate_rmse(self, y_true, y_pred):
        return np.sqrt(mean_squared_error(y_true, y_pred))
    def __call__(self, X):
        return self.predict(X)

I want to integrate with my modeling regression like below :

class EnsembleRegressor:
    def __init__(self, data_source_file, result_file,params_file):
        self.data_source_file = data_source_file
        self.result_file = result_file
        self.params_file = params_file
        blending = BlendingEnsemble()
        self.models = {
            'Blending':blending ,
            'XGBoost': xgb.XGBRegressor,
            'RandomForest': RandomForestRegressor
        }
    

    def train_and_predict(self, sheet, model_name='Blending', param_grid=None):
        # Splitting features and label
        data = pd.read_excel(self.data_source_file, sheet_name=sheet)
        X = data.drop(columns='Earned Value')
        y = data['Earned Value']

        # Splitting data into training and testing sets
        x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=58)
        x_train = x_train.sort_index(ascending=True)
        y_train = y_train.sort_index(ascending=True)
        x_test = x_test.sort_index(ascending=True)
        y_test = y_test.sort_index(ascending=True)

        # Model selection
        model_class = self.models.get(model_name)
        if model_class is None:
            raise ValueError(f"Invalid model name: {model_name}")

        # Parameter tuning
        best_params, results = self.tune_parameters(model_class, x_train, y_train, param_grid)

        # Train the model with the best parameters
        model = model_class(**best_params)
        model.fit(x_train, y_train)
        
        # Predict on the test set
        x_test.reset_index(drop=True, inplace=True)

        ev_pred = model.predict(x_test)

        # Create a DataFrame with predictions
        perform = pd.DataFrame({'AT':x_test.iloc[:,0].values,'Planned Value':x_test.iloc[:,1].values,
                                'Actual': y_test.values, 'EV_Pred': ev_pred, 'Model': model_name})
        results ['Model']=model_name
        # Save the results to Excel files
        self.to_excel(perform, self.result_file, sheet)
        self.to_excel(results, self.params_file, sheet)

    def tune_parameters(self, model_class, X, y, param_grid=None):
        if param_grid is None:
            param_grid = {}  

        # Grid search to find the best parameters
        best_params = None
        best_score = float('inf')
        results = []

        for params in self.grid_search(param_grid):
            model = model_class(**params)
            model.fit(X, y)
            y_pred = model.predict(X)
            r2 = self.evaluate_r2(y, y_pred)
            rmse = self.evaluate_rmse(y, y_pred)

            results.append({**params, 'R2': r2, 'RMSE': rmse})

            if rmse < best_score:
                best_score = rmse
                best_params = params

        results_df = pd.DataFrame(results)
        return best_params, results_df

    @staticmethod
    def grid_search(param_grid):
        keys, values = zip(*param_grid.items())
        for combination in itertools.product(*values):
            yield dict(zip(keys, combination))

    @staticmethod
    def evaluate_r2(y_true, y_pred):
        return r2_score(y_true, y_pred)

    @staticmethod
    def evaluate_rmse(y_true, y_pred):
        return np.sqrt(mean_squared_error(y_true, y_pred))
    
    @staticmethod
    def to_excel(df, file, sheet_name):
        try:
            book = load_workbook(file)
            writer = pd.ExcelWriter(file, engine='openpyxl')
            writer.book = book
            writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

            if sheet_name in writer.sheets:
                sheet = writer.sheets[sheet_name]
                last_row = sheet.max_row
            else:
                last_row = 0

            if last_row < 1:
                df.to_excel(writer, sheet_name=sheet_name, index=False)
            else:
                df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=last_row)

            writer.save()
        except FileNotFoundError:
            df.to_excel(file, sheet_name=sheet_name, index=False)

Please some one to help me, stuck for finding the solution

i try for connecting my blendingmodel as a normal ML modeling like fit and predict. i just expecting my blendingensemble can connect with my regression modeling

0

There are 0 best solutions below