xxxxxxxxxx
from sklearn.model_selection import train_test_split
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42)
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
xxxxxxxxxx
# import packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
# importing and inspecting data
df = pd.read_csv('prediction.csv')
print(df.shape)
print(df.head())
print(df.columns)
X= df['area']
y=df['prices']
# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(
X,y , random_state=104,test_size=0.25, shuffle=True)
# importing and inspecting training and testing data
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
# Alternative approach
split_ratio = 0.8 # 80% for training, 20% for testing
# Shuffle the DataFrame rows randomly
df_shuffled = df.sample(frac=1, random_state=42)
# Calculate the split index
split_index = int(len(df_shuffled) * split_ratio)
# Split the DataFrame
train_df = df_shuffled.iloc[:split_index]
test_df = df_shuffled.iloc[split_index:]
xxxxxxxxxx
X = np.arange(10).reshape((5, 2))
y = range(5)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
xxxxxxxxxx
>>> import numpy as np
>>> from sklearn.model_selection import train_test_split
>>> X, y = np.arange(10).reshape((5, 2)), range(5)
>>> X
array([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])
>>> list(y)
[0, 1, 2, 3, 4]
>>> X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42)
>>> X_train
array([[4, 5],
[0, 1],
[6, 7]])
>>> y_train
[2, 0, 3]
>>> X_test
array([[2, 3],
[8, 9]])
>>> y_test
[1, 4]
>>> train_test_split(y, shuffle=False)
[[0, 1, 2], [3, 4]]
xxxxxxxxxx
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.33, random_state = 42)