Besides pandas and numpy, there are other python libraries in statistics theme, such as: scipy, sklearn, matplotlib for data visualization or statsmodels, which provides classes and functions for the estimation of many different statistical models, as well as for conducting statistical tests, and statistical data exploration. Let’s see how to use these many options by practicing with AI.
Start with creating lists from data genereted from a CSV file (sample file available to download here: https://heart4datascience.com/2020/12/20/pandas/)
#stats lib
import numpy as np # pip install numpy
import pandas as pd # pip install pandas
from scipy import stats # pip install scipy
import matplotlib.pyplot as plt # pip install matplotlib
import seaborn as sns # pip install seaborn
import statsmodels.api as sm # pip install statsmodel
# read file to be used on trainings
df = pd.read_csv('MW.csv')
# get data from the file into the list
IQ_Column = df['IQ']
Age_Column = df['Age']
IQ_List = IQ_Column.tolist()
Age_List = Age_Column.tolist()
print(IQ_List)
print(Age_List)
print(len(IQ_List))
print(len(Age_List))

And now let’s check the results of basic statistical operations such as the average or median based on libraries methods compare.
# AVERAGE MEASURES
# numpy
np_data = IQ_List
IQ_mean = np.mean(np_data)
IQ_median = np.median(np_data)
IQ_standard_dev = np.std(np_data)
print(IQ_mean, IQ_median, IQ_standard_dev)
# result: 112.35 113.0 23.31903728716089
# pandas
pd_data = pd.Series(IQ_List)
IQ_mean = pd_data.mean()
IQ_median = pd_data.median()
IQ_standard_dev = pd_data.std()
print(IQ_mean, IQ_median, IQ_standard_dev)
# result: 112.35 113.0 23.616107063199742
# scipy
scp_data = IQ_List[:6]
scp_mean = stats.tmean(scp_data)
scp_median = stats.scoreatpercentile(scp_data,20)
scp_mode = stats.mode(scp_data)
scp_dev = stats.tstd(scp_data)
print(scp_data) # [132, 150, 123, 129, 132, 90]
print(scp_mean) # 126.0
print(scp_median) # 123.0
print(scp_mode) # ModeResult(mode=132, count=2)
print(scp_dev) # 19.809088823063014
# statsmodel
scp_data = Age_List[:6]
sm_tmean = sm.tsa.stattools.stats.tmean(Age_List)
# 47.85
sm_gmean = sm.tsa.stattools.stats.gmean(Age_List)
# 40.90597080827608
print(sm_tmean)
print(sm_gmean)
#seaborn and matplotlib
sns.histplot(Age_List)
plt.show()




