from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
X = df.drop("target", axis = 1).values
y = df["target"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)
# Define sequential stages of your model (Only the last step should contain model, others are transformers)
steps = [('scale',StandardScaler()),
('knn', KNeighborsClassifier())]
# Construct the pipeline
pipeline = Pipeline(steps)
# Perform cross validation on pipeline
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, mean_squared_error
custom_scorer = make_scorer(mean_squared_error)
scores = cross_val_score(pipeline,X_train,y_train, scoring= custom_scorer,cv=10) # "neg_mean_squared_error"
# Perform gridsearch on pipeline
parameters = {"knn__n_neighbors": np.arange(1, 50)} # Use format: step-name + __ + parameter_name
cv = GridSearchCV(pipeline, param_grid=parameters)
# Train
cv.fit(X_train, y_train)
# Predict
y_pred = cv.predict(X_test)
### You can break down the pipeline and add the results of each step in the output
# Create a feature union of transformers : allows you to concatenate the results of multiple transformer objects along the second axis
combined_features = FeatureUnion([
('scaler', scaler),
('poly_features', poly_features),
('pca', pca)
])
# Define the classifier
classifier = RandomForestClassifier(random_state=42)
# Create a pipeline with FeatureUnion and the classifier
pipeline = Pipeline([
('features', combined_features),
('classifier', classifier)
])