In [1]:
import numpy as np
import pandas as pd
In [4]:
CS=pd.read_excel("F:/2019 GB Python/Descstats.xlsx")
In [5]:
CS.head()
Out[5]:
Sample 1 Sample 2 Group
0 28.686942 0.627870 A
1 30.811276 50.558054 A
2 28.477675 54.593256 A
3 31.689752 19.613195 A
4 31.132768 48.658675 A
In [6]:
CS.describe()
Out[6]:
Sample 1 Sample 2
count 1000.000000 1000.000000
mean 30.012077 30.233962
std 1.041527 29.067057
min 26.160543 0.154764
25% 29.314089 8.721795
50% 30.014408 20.740185
75% 30.730071 43.013396
max 33.757718 194.102611
In [7]:
CS.skew()
Out[7]:
Sample 1    0.029793
Sample 2    1.701568
dtype: float64
In [8]:
CS['Sample 1'].skew()
Out[8]:
0.02979292669727891
In [9]:
CS.kurt()
Out[9]:
Sample 1    0.093034
Sample 2    3.714773
dtype: float64
In [10]:
import matplotlib.pyplot as plt
import seaborn as sns
In [11]:
%matplotlib inline
In [12]:
sns.set(color_codes= True)
In [13]:
sns.distplot(CS['Sample 1'])
C:\Users\Neil\Anaconda3\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x975c9b0>
In [18]:
sns.distplot(CS['Sample 1'], kde=False)
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x7cf2a90>
In [22]:
sns.distplot(CS['Sample 1'], hist=False)
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x9ee5ad0>
In [23]:
sns.distplot(CS['Sample 1'])
plt.title('Sample 1 Histogram')
Out[23]:
Text(0.5, 1.0, 'Sample 1 Histogram')
In [24]:
sns.distplot(CS['Sample 2'])
plt.title('Sample 2 Histogram')
Out[24]:
Text(0.5, 1.0, 'Sample 2 Histogram')
In [26]:
plt.subplot(1,2,1)
sns.distplot(CS['Sample 1'])
plt.title('Sample 1 Histogram')
plt.subplot(1,2,2)
sns.distplot(CS['Sample 2'])
plt.title('Sample 2 Histogram')
Out[26]:
Text(0.5, 1.0, 'Sample 2 Histogram')
In [27]:
plt.figure(figsize=(20,6))
plt.subplot(1,2,1)
sns.distplot(CS['Sample 1'])
plt.title('Sample 1 Histogram')
plt.subplot(1,2,2)
sns.distplot(CS['Sample 2'])
plt.title('Sample 2 Histogram')
Out[27]:
Text(0.5, 1.0, 'Sample 2 Histogram')
In [28]:
CS['Sample 1'].hist()
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x98240d0>
In [29]:
CS['Sample 1'].hist(by=CS['Group'])
Out[29]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x0995B870>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x09995910>],
      dtype=object)
In [30]:
CS['Sample 2'].hist(by=CS['Group'])
Out[30]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x099E6710>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x09A14FF0>],
      dtype=object)
In [ ]: