In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(color_codes =True)
%matplotlib inline
In [2]:
Sales = pd.read_excel("F:/2019 GB Python/Regression.xlsx")
In [3]:
Sales.head()
Out[3]:
Sales Enquires LaborDeployed AvgResponseTime NoofStockOuts
0 20.6 5193 48 96.325071 2
1 12.4 4995 37 193.276852 8
2 18.6 5229 48 162.271452 4
3 19.8 5222 47 132.756201 4
4 7.5 4614 21 153.479144 8
In [4]:
sns.distplot(Sales.Sales, kde=False)
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x99f21b0>
In [5]:
sns.scatterplot(y=Sales.Sales, x=Sales.Enquires)
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x9a4b0f0>
In [6]:
sns.pairplot(Sales, vars = ['Sales', 'Enquires'])
Out[6]:
<seaborn.axisgrid.PairGrid at 0x9d51a30>
In [7]:
sns.pairplot(Sales, vars = ['Sales', 'Enquires', 'LaborDeployed','AvgResponseTime', 'NoofStockOuts'])
Out[7]:
<seaborn.axisgrid.PairGrid at 0x9e91630>
In [8]:
cor = Sales.corr(method='pearson')
cor
Out[8]:
Sales Enquires LaborDeployed AvgResponseTime NoofStockOuts
Sales 1.000000 0.693551 0.609308 -0.146156 -0.777959
Enquires 0.693551 1.000000 0.910704 -0.125756 -0.176915
LaborDeployed 0.609308 0.910704 1.000000 -0.077711 -0.110578
AvgResponseTime -0.146156 -0.125756 -0.077711 1.000000 0.115509
NoofStockOuts -0.777959 -0.176915 -0.110578 0.115509 1.000000
In [9]:
sns.heatmap(cor)
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0xa789b30>

Independent Variables that are correlated to Sales : 'Enquires','LaborDeployed','NoofStockOuts'

In [10]:
import statsmodels.api as sm
In [11]:
inputs = ['Enquires', 'LaborDeployed', 'NoofStockOuts']
X= Sales[inputs]
X
Out[11]:
Enquires LaborDeployed NoofStockOuts
0 5193 48 2
1 4995 37 8
2 5229 48 4
3 5222 47 4
4 4614 21 8
5 4856 30 5
6 5029 30 4
7 4483 24 7
8 5203 48 9
9 5369 48 2
10 4923 29 4
11 4923 25 7
12 5242 50 2
13 4796 31 10
14 5018 38 3
15 4677 22 4
16 5048 33 5
17 4940 31 6
18 4968 34 2
19 5162 41 8
20 5293 43 2
21 4917 36 7
22 5121 37 4
23 5163 39 4
24 4803 23 4
25 4532 23 6
26 5126 47 7
27 5036 30 5
28 4851 26 9
29 5369 48 7
... ... ... ...
70 5094 43 2
71 5150 42 3
72 5126 41 6
73 4676 21 5
74 4948 37 6
75 4686 22 1
76 4786 24 2
77 4878 29 9
78 5224 47 2
79 5292 47 9
80 5036 37 4
81 5085 40 4
82 4875 28 9
83 4960 32 8
84 4822 35 7
85 4640 20 2
86 5038 37 4
87 5143 39 8
88 5071 43 4
89 5025 38 6
90 5026 41 1
91 5052 46 5
92 4807 27 2
93 4861 30 4
94 4703 20 8
95 4694 27 6
96 5442 50 4
97 4825 26 2
98 5110 42 9
99 4764 25 1

100 rows × 3 columns

In [12]:
y=Sales['Sales']
y
Out[12]:
0     20.6
1     12.4
2     18.6
3     19.8
4      7.5
5     13.4
6     16.5
7      8.0
8     14.4
9     21.2
10    15.7
11    11.8
12    20.9
13     9.6
14    17.9
15    14.0
16    15.3
17    13.5
18    17.4
19    14.0
20    20.2
21    11.5
22    17.1
23    17.5
24    14.2
25     8.7
26    16.3
27    16.0
28     7.4
29    17.0
      ... 
70    19.1
71    18.6
72    14.6
73    13.2
74    15.1
75    15.1
76    16.7
77     7.7
78    22.0
79    15.1
80    17.5
81    16.5
82    11.9
83    10.1
84    11.3
85    13.0
86    15.9
87    10.8
88    17.0
89    12.8
90    17.5
91    16.8
92    16.2
93    13.9
94     7.6
95     9.8
96    19.9
97    17.5
98    10.9
99    17.1
Name: Sales, Length: 100, dtype: float64
In [14]:
X=sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions =model.predict(X)
model.summary()
Out[14]:
OLS Regression Results
Dep. Variable: Sales R-squared: 0.925
Model: OLS Adj. R-squared: 0.923
Method: Least Squares F-statistic: 395.2
Date: Fri, 31 May 2019 Prob (F-statistic): 7.21e-54
Time: 15:25:04 Log-Likelihood: -147.93
No. Observations: 100 AIC: 303.9
Df Residuals: 96 BIC: 314.3
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -26.0446 5.199 -5.009 0.000 -36.366 -15.724
Enquires 0.0090 0.001 7.394 0.000 0.007 0.011
LaborDeployed 0.0298 0.029 1.037 0.302 -0.027 0.087
NoofStockOuts -0.9852 0.041 -23.777 0.000 -1.067 -0.903
Omnibus: 1.090 Durbin-Watson: 2.200
Prob(Omnibus): 0.580 Jarque-Bera (JB): 1.182
Skew: -0.204 Prob(JB): 0.554
Kurtosis: 2.657 Cond. No. 2.39e+05


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.39e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
In [19]:
inputs = ['Enquires','NoofStockOuts']
X= Sales[inputs]
In [20]:
X=sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions =model.predict(X)
model.summary()
Out[20]:
OLS Regression Results
Dep. Variable: Sales R-squared: 0.924
Model: OLS Adj. R-squared: 0.923
Method: Least Squares F-statistic: 591.8
Date: Fri, 31 May 2019 Prob (F-statistic): 4.46e-55
Time: 15:27:19 Log-Likelihood: -148.48
No. Observations: 100 AIC: 303.0
Df Residuals: 97 BIC: 310.8
Df Model: 2
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -30.7459 2.549 -12.064 0.000 -35.804 -25.688
Enquires 0.0101 0.001 20.212 0.000 0.009 0.011
NoofStockOuts -0.9798 0.041 -23.824 0.000 -1.061 -0.898
Omnibus: 0.765 Durbin-Watson: 2.175
Prob(Omnibus): 0.682 Jarque-Bera (JB): 0.890
Skew: -0.169 Prob(JB): 0.641
Kurtosis: 2.685 Cond. No. 1.17e+05


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.17e+05. This might indicate that there are
strong multicollinearity or other numerical problems.

Sales = -30.74 + 0.0101 Enq -0.9798 No. of Stock Outs

In [ ]: