xxxxxxxxxx
# import the class
from sklearn.linear_model import LogisticRegression
# instantiate the model (using the default parameters)
logreg = LogisticRegression()
# fit the model with data
logreg.fit(X_train,y_train)
#
y_pred=logreg.predict(X_test)
xxxxxxxxxx
# Import the necessary modules
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state=42)
# Create the classifier: logreg
logreg = LogisticRegression()
# Fit the classifier to the training data
logreg.fit(X_train, y_train)
# Predict the labels of the test set: y_pred
y_pred = logreg.predict(X_test)
# Compute and print the confusion matrix and classification report
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
xxxxxxxxxx
from sklearn.metrics import confusion_matrix
# Specify independent and dependent features
X = np.asarray(df[['A', 'B', 'C', 'D', 'E', 'F', 'G']])
y = np.asarray(df['target'])
# Preprocess dataset
from sklearn import preprocessing
X = preprocessing.StandardScaler().fit(X).transform(X)
# Split into train and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)
# Train the model
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(C=0.01, solver='liblinear')
LR.fit(X_train,y_train)
# Predict the test set
y_pred = LR.predict(X_test)
# See classification report and confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
classification_report(y_test, y_pred)
confusion_matrix(y_test, y_pred, labels=[1,0])
# Predicted probability on test set for positive/target class
y_pred_prob = LR.predict_proba(X_test)[:, 1]
# Evaluate the model
from sklearn.metrics import jaccard_score
jaccard_score(y_test, y_pred,pos_label=0)
from sklearn.metrics import log_loss
log_loss(y_test, y_pred_prob)
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, y_pred_prob))
xxxxxxxxxx
# import the metrics class
from sklearn import metrics
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix
xxxxxxxxxx
from statsmodels.formula.api import logit
model = logit("target ~ x_var", data=df).fit()
print(model.params)
# Visualize logistic model
sns.regplot(x="x_var", y="target", data=df, ci=None, logistic=True)
X_test = pd.DataFrame({"x_var": np.arange(-1, 6.25, 0.25)})
y_pred_prob = model.predict(X_test)
y_pred = np.round(y_pred_prob)
# Odds ratio : p/(1-p) or probability of something happenning over not happening
odds_ratio = y_pred_prob / (1- y_pred_prob)
# Visualize odds ratio / log odds ratio
sns.lineplot(x="x_var", y="odds_ratio", data=df)
plt.axhline(y=1, linestyle="dotted")
plt.yscale("log") # If you want to make the curve into linear make y : np.log(odds_ratio)
plt.show()
# Confusion matrix
conf_matrix = model.pred_table()
TN = conf_matrix[0,0]
TP = conf_matrix[1,1]
FN = conf_matrix[1,0]
FP = conf_matrix[0,1]
# Visualize confusion matrix
from statsmodels.graphics.mosaicplot import mosaic
mosaic(conf_matrix)
xxxxxxxxxx
import numpy as np
class LogisticRegression:
def __init__(self, learning_rate=0.01, num_iterations=10000):
self.learning_rate = learning_rate
self.num_iterations = num_iterations
self.weights = None
self.bias = None
def fit(self, X, y):
# initialize weights and bias to zero
self.weights = np.zeros(X.shape[1])
self.bias = 0
# gradient descent
for i in range(self.num_iterations):
z = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(z)
# calculate gradients
dw = (1 / X.shape[0]) * np.dot(X.T, (y_pred - y))
db = (1 / X.shape[0]) * np.sum(y_pred - y)
# update weights and bias
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict(self, X):
z = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(z)
return np.round(y_pred)
def sigmoid(self, z):
return 1 / (1 + np.exp(-z))