DataFrames

#code update 02.2026

Date ranges

import pandas as pd

dates1 = pd.date_range('2021-04-01', periods=6, freq="D")
dates2 = pd.date_range('2025-09-11', periods=4,freq="ME")
dates3 = pd.date_range('2023-11-22', periods=3,freq="MS")
dates4 = pd.date_range('2020-12-31', periods=4,freq="YE")
dates5 = pd.date_range('2020-10-31', periods=6,freq="YS")
dates6 = pd.date_range('2020-10-31', periods=3,freq="QE")
dates7 = pd.date_range('2020-10-31', periods=3,freq="QS")

print(dates1)
print(dates2)
print(dates3)
print(dates4)
print(dates5)
print(dates6)
print(dates7)

DatetimeIndex([‘2021-04-01’, ‘2021-04-02’, ‘2021-04-03’, ‘2021-04-04’,
‘2021-04-05’, ‘2021-04-06’],
dtype=’datetime64[ns]’, freq=’D’)
DatetimeIndex([‘2025-09-30’, ‘2025-10-31’, ‘2025-11-30’, ‘2025-12-31′], dtype=’datetime64[ns]’, freq=’ME’)
DatetimeIndex([‘2023-12-01’, ‘2024-01-01’, ‘2024-02-01′], dtype=’datetime64[ns]’, freq=’MS’)
DatetimeIndex([‘2020-12-31’, ‘2021-12-31’, ‘2022-12-31’, ‘2023-12-31′], dtype=’datetime64[ns]’, freq=’YE-DEC’)
DatetimeIndex([‘2021-01-01’, ‘2022-01-01’, ‘2023-01-01’, ‘2024-01-01’,
‘2025-01-01’, ‘2026-01-01’],
dtype=’datetime64[ns]’, freq=’YS-JAN’)
DatetimeIndex([‘2020-12-31’, ‘2021-03-31’, ‘2021-06-30′], dtype=’datetime64[ns]’, freq=’QE-DEC’)
DatetimeIndex([‘2021-01-01’, ‘2021-04-01’, ‘2021-07-01′], dtype=’datetime64[ns]’, freq=’QS-JAN’)

DataFrame creating

import numpy as np

# Creating a DataFrame by passing a numpy array,
# with an index and labeled columns:
df = pd.DataFrame(np.random.randn(6,4),
                  index=dates1,
                  columns=['I','II','III','IV'])
print(df)
df result

DataFrame functions

# Data Parts
print('first row od dataframe:',df.head(1))
print('part of dataframe:',df.tail(3))

# Describe shows a quick statistic summary of your data
print('stats for dataframe:',df.describe())

# Sorting & Transposition
df2 = df.sort_index(axis=1, ascending=False)
print('transposition for sorted dataframe:',df2.T)
print('sorted dataframe no.2:',df2.sort_values(by="II"))

DataFrames selection methods

# Creating a DataFrame by passing a numpy array, 
#with an index and labeled columns:

dates = pd.date_range('20210101', periods=4)
df3 = pd.DataFrame(np.random.randn(4,3), 
index=dates, columns=list('XYZ'))
print(df3)

# Selection methods
print('values for X:')
print(df3['X'])
print('part of:')
print(df3[2:4])
print('Selection by Label:')
print(df3.loc[dates[3]])
print('get all the indexes and only two columns:')
print(df3.loc[:,['X','Y']])
print('if you want particular value')
print(df3.loc[dates[0],'X'])
print('Selection by Position:')
print(df3.iloc[3])
print(df3.iloc[3:4,0:2])
print(df3.iloc[[1,2,3],[0,2]])
print(df3[df3.Z > 0.05])
df3 selection results

Anova, Correlation, Regression

Anova

import scipy.stats as scs
import pandas as pd
#ANOVA
#NON-PARAMETRIC
FileData = pd.read_csv('man_and_woman.csv', sep=',', na_values='.')
KruskalWallis_results = scs.kruskal(FileData['Age'], FileData['IQ'], FileData['PIQ'])
print(KruskalWallis_results)

Friedmann_results = scs.friedmanchisquare(FileData['Age'], FileData['IQ'], FileData['PIQ'])
print(Friedmann_results)

#POST-HOC
ANOVA_results = scs.f_oneway(FileData['Age'], FileData['IQ'], FileData['PIQ'])
print(ANOVA_results)

Results:
KruskalResult(statistic=67.47559460718044, pvalue=2.227721003370935e-15)
FriedmanchisquareResult(statistic=43.73584905660375, pvalue=3.1833254890177335e-10)
F_onewayResult(statistic=92.45047895919548, pvalue=8.256579887742929e-25)

Correlation & Regression

#CORRELATION
#Correlation coefficient
CC_results = scs.pearsonr(FileData['IQ'], FileData['PIQ'])
print("Cor.coefficient:", CC_results)

#Spearman rank coefficient
Spearmann_results = scs.spearmanr(FileData['IQ'], FileData['PIQ'])
print(Spearmann_results)

#Regression
Regression_results = scs.linregress(FileData['IQ'], FileData['PIQ'])
print(Regression_results)

Results:
Cor.coefficient: (0.778135113690377, 3.4381859970690258e-09)
SpearmanrResult(correlation=0.7149801806441931, pvalue=2.1806977447399098e-07)
LinregressResult(slope=0.7404062323284799, intercept=27.84035979789529, rvalue=0.7781351136903771, pvalue=3.438185997069003e-09, stderr=0.09695047023723001)