Breaking News: Grepper is joining You.com. Read the official announcement!

linear regression python

Add Answer

Innocent Iguana answered on May 14, 2021 Popularity 10/10 Helpfulness 3/10

answer linear regression python

related linear regression python

related python linear regression

related linear regression python code

linear regression python

Comment

Tip Innocent Iguana 1 GREPCC

# You want a regression line : y = mx + c
from statsmodels.formula.api import ols
# Formula applied : y = x, finds m and c on its own for best fit
num_model = ols("y ~ x_num", data=df).fit()
# Calculate co-efficient of each category with relative to 0 instead of relative to intercept, its the mean for each category
cat_model = ols("y ~ x_cat + 0", data=df).fit()
# See model params : slope m and intercept c
print(cat_model.params)
# See prediction on original fitted data 
print(num_model.fittedvalues)
# See residuals
print(model.resid)
# Model summary
model.summary()
# R-squared
print(model.rsquared)
# Residual mean squared error
rse = np.sqrt(model.mse_resid)
# Create test data
test_data = pd.DataFrame({"x_num": np.arange(20, 41)})
# Predict on test data
print(num_model.predict(test_data))
# Summary values
summary_df = model.get_influence().summary_frame()
# Leverage
df["leverage"] = summary_df["hat_diag"]
# Cooks distance
summary_df["cooks_dist"] = summary_df["cooks_d"]
# Residual plot
sns.residplot(x="X", y="y", data=bream, lowess=True)
# QQ plot
from statsmodels.api import qqplot
qqplot(data=model.resid, fit=True, line="45")
# Scale location plot
residual_abs_squared = model.get_influence().resid_studentized_internal
residual_measured = np.sqrt(residual_abs_squared)
sns.regplot(x=model.fittedvalues, y=residual_measured, ci=None, lowess=True)
# NOTE : You can transform the X and y before fitting and then train. You can predict with the model and the predicted values should be back-transformed for y if y was transformed.

xxxxxxxxxx

# You want a regression line : y = mx + c

from statsmodels.formula.api import ols

# Formula applied : y = x, finds m and c on its own for best fit

num_model = ols("y ~ x_num", data=df).fit()

# Calculate co-efficient of each category with relative to 0 instead of relative to intercept, its the mean for each category

cat_model = ols("y ~ x_cat + 0", data=df).fit()

# See model params : slope m and intercept c

print(cat_model.params)

# See prediction on original fitted data

print(num_model.fittedvalues)

# See residuals

print(model.resid)

# Model summary

model.summary()

# R-squared

print(model.rsquared)

# Residual mean squared error

rse = np.sqrt(model.mse_resid)

# Create test data

test_data = pd.DataFrame({"x_num": np.arange(20, 41)})

# Predict on test data

print(num_model.predict(test_data))

# Summary values

summary_df = model.get_influence().summary_frame()

# Leverage

df["leverage"] = summary_df["hat_diag"]

# Cooks distance

summary_df["cooks_dist"] = summary_df["cooks_d"]

# Residual plot

sns.residplot(x="X", y="y", data=bream, lowess=True)

# QQ plot

from statsmodels.api import qqplot

qqplot(data=model.resid, fit=True, line="45")

# Scale location plot

residual_abs_squared = model.get_influence().resid_studentized_internal

residual_measured = np.sqrt(residual_abs_squared)

sns.regplot(x=model.fittedvalues, y=residual_measured, ci=None, lowess=True)

# NOTE : You can transform the X and y before fitting and then train. You can predict with the model and the predicted values should be back-transformed for y if y was transformed.

Popularity 10/10 Helpfulness 3/10 Language python

Source: Grepper

Tags: linear-regression python

Link to this answer
Share Copy Link

Contributed on Jan 17 2023

Innocent Iguana

0 Answers Avg Quality 2/10

Closely Related Answers

linear regression python

Comment

Tip Innocent Iguana 1 GREPCC

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, KFold
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Construct model
lm=LinearRegression()
# Simple linear regression uses 1 column with eqn: y = mx + c
# Multiple linear regression uses multiple columns with eqn: z = mx + ny +c
# Fit the model
lm.fit(X_train, y_train)
# Predicted estimation
y_pred = lm.predict(X_test)
# This is intercept of the line (Also known as bias co-efficient)
intercept = lm.intercept_
# This is slope (m) of the line y=mx+c (Also known as relevant variable's co-efficient)
slope = lm.coef_
# Percentage of target values explained by the features
rsquared = lm.score(X_test, y_test)
# RMSE : Average error in prediction
mean_squared_error(y_test, y_pred, squared=False)
# Prediction of specific range
new_x = np.arange(1,101,1).reshape(-1,1) # Or you can make it dataframe
new_pred_y = lm.predict(new_x)

# Do k-fold cross validation
kf = KFold(n_splits=6, shuffle=True, random_state=42)
cv_results = cross_val_score(lm, X_train, y_train, cv=kf)
# Mean, std and confidence interval of the cross-validation
print(np.mean(cv_results), np.std(cv_results), np.quantile(cv_results, [0.025, 0.975]))

# Visualize Feature importance
names = df.drop("target", axis=1).columns
importance = lm.fit(X, y).coef_
plt.bar(names, importance)
plt.xticks(rotation=45)
plt.show()

xxxxxxxxxx

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

from sklearn.model_selection import cross_val_score, KFold

# Split the dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Construct model

lm=LinearRegression()

# Simple linear regression uses 1 column with eqn: y = mx + c

# Multiple linear regression uses multiple columns with eqn: z = mx + ny +c

# Fit the model

lm.fit(X_train, y_train)

# Predicted estimation

y_pred = lm.predict(X_test)

# This is intercept of the line (Also known as bias co-efficient)

intercept = lm.intercept_

# This is slope (m) of the line y=mx+c (Also known as relevant variable's co-efficient)

slope = lm.coef_

# Percentage of target values explained by the features

rsquared = lm.score(X_test, y_test)

# RMSE : Average error in prediction

mean_squared_error(y_test, y_pred, squared=False)

# Prediction of specific range

new_x = np.arange(1,101,1).reshape(-1,1) # Or you can make it dataframe

new_pred_y = lm.predict(new_x)

# Do k-fold cross validation

kf = KFold(n_splits=6, shuffle=True, random_state=42)

cv_results = cross_val_score(lm, X_train, y_train, cv=kf)

# Mean, std and confidence interval of the cross-validation

print(np.mean(cv_results), np.std(cv_results), np.quantile(cv_results, [0.025, 0.975]))

# Visualize Feature importance

names = df.drop("target", axis=1).columns

importance = lm.fit(X, y).coef_

plt.bar(names, importance)

plt.xticks(rotation=45)

plt.show()

Popularity 10/10 Helpfulness 6/10 Language python

Source: Grepper

Tags: linear-regression linear-regressio

Link to this answer
Share Copy Link

Contributed on Jan 29 2023

Innocent Iguana

0 Answers Avg Quality 2/10

python linear regression

Comment

Tip Nasty Nightingale 1 GREPCC

import seaborn as sb
from matplotlib import pyplot as plt
df = sb.load_dataset('tips')
sb.regplot(x = "total_bill", y = "tip", data = df)
plt.show()

xxxxxxxxxx

import seaborn as sb

from matplotlib import pyplot as plt

df = sb.load_dataset('tips')

sb.regplot(x = "total_bill", y = "tip", data = df)

plt.show()

Popularity 10/10 Helpfulness 3/10 Language python

Source: www.tutorialspoint.com

Tags: linear-regression linear-regressio

Link to this answer
Share Copy Link

Contributed on May 14 2021

Nasty Nightingale

0 Answers Avg Quality 2/10

python linear regression

Comment

Tip HotFlow 1 GREPCC

>>> from scipy import stats
>>> import numpy as np
>>> x = np.random.random(10)
>>> y = np.random.random(10)
>>> slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

xxxxxxxxxx

>>> from scipy import stats

>>> import numpy as np

>>> x = np.random.random(10)

>>> y = np.random.random(10)

>>> slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

Popularity 10/10 Helpfulness 3/10 Language python

Source: docs.scipy.org

Tags: linear-regression linear-regressio

Link to this answer
Share Copy Link

Contributed on May 18 2021

HotFlow

0 Answers Avg Quality 2/10

linear regression python code

Comment

-1

Tip Comfortable Cat 1 GREPCC

import numpy as np
import matplotlib.pyplot as plt
 
def estimate_coef(x, y):
    # number of observations/points
    n = np.size(x)
 
    # mean of x and y vector
    m_x = np.mean(x)
    m_y = np.mean(y)
 
    # calculating cross-deviation and deviation about x
    SS_xy = np.sum(y*x) - n*m_y*m_x
    SS_xx = np.sum(x*x) - n*m_x*m_x
 
    # calculating regression coefficients
    b_1 = SS_xy / SS_xx
    b_0 = m_y - b_1*m_x
 
    return (b_0, b_1)
 
def plot_regression_line(x, y, b):
    # plotting the actual points as scatter plot
    plt.scatter(x, y, color = "m",
               marker = "o", s = 30)
 
    # predicted response vector
    y_pred = b[0] + b[1]*x
 
    # plotting the regression line
    plt.plot(x, y_pred, color = "g")
 
    # putting labels
    plt.xlabel('x')
    plt.ylabel('y')
 
    # function to show plot
    plt.show()
 
def main():
    # observations / data
    x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
 
    # estimating coefficients
    b = estimate_coef(x, y)
    print("Estimated coefficients:\nb_0 = {}  \
          \nb_1 = {}".format(b[0], b[1]))
 
    # plotting regression line
    plot_regression_line(x, y, b)
 
if __name__ == "__main__":
    main()

xxxxxxxxxx

import numpy as np

import matplotlib.pyplot as plt

def estimate_coef(x, y):

    # number of observations/points

    n = np.size(x)

    # mean of x and y vector

    m_x = np.mean(x)

    m_y = np.mean(y)

    # calculating cross-deviation and deviation about x

    SS_xy = np.sum(y*x) - n*m_y*m_x

    SS_xx = np.sum(x*x) - n*m_x*m_x

    # calculating regression coefficients

    b_1 = SS_xy / SS_xx

    b_0 = m_y - b_1*m_x

    return (b_0, b_1)

def plot_regression_line(x, y, b):

    # plotting the actual points as scatter plot

    plt.scatter(x, y, color = "m",

               marker = "o", s = 30)

    # predicted response vector

    y_pred = b[0] + b[1]*x

    # plotting the regression line

    plt.plot(x, y_pred, color = "g")

    # putting labels

    plt.xlabel('x')

    plt.ylabel('y')

    # function to show plot

    plt.show()

def main():

    # observations / data

    x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

    # estimating coefficients

    b = estimate_coef(x, y)

    print("Estimated coefficients:\nb_0 = {}  \

          \nb_1 = {}".format(b[0], b[1]))

    # plotting regression line

    plot_regression_line(x, y, b)

if __name__ == "__main__":

    main()

Popularity 10/10 Helpfulness 2/10 Language python

Source: www.geeksforgeeks.org

Tags: linear-regression linear-regressio

Link to this answer
Share Copy Link

Contributed on Feb 03 2022

Comfortable Cat

0 Answers Avg Quality 2/10

linear regression python

Contents

More Related Answers

linear regression python

Closely Related Answers

linear regression python

python linear regression

python linear regression

linear regression python code

Grepper

Documentation

Social

Legal

Contact

Oops, You will need to install Grepper and log-in to perform this action.