Before we move further with deeper python and machine learning, let’s go back for a moment, what we used pandas and numpy for. Let AI refresh our memory.
Hello AI!, let's go through the pandas and numpy library in Python today. Can you show me some useful examples ?
Installation


Import into current File:
import pandas as pd
import numpy as np
NumPy
Basics
import numpy as np
a = np.array([1,2,3,4])
print(a)
#[1 2 3 4]
b = np.array([[1,2],[3,4]])
print(b)
# [[1 2]
# [3 4]]
zeros = np.zeros((2,2))
print(zeros)
# [[0. 0.]
# [0. 0.]]
ones = np.ones((3,2))
print(ones)
# [[1. 1.]
# [1. 1.]
# [1. 1.]]
arange = np.arange(0,5,1)
print(arange)
arange = np.arange(0,10,2)
print(arange)
arange = np.arange(1,15,3)
print(arange)
#1: [0 1 2 3 4]
#2: [0 2 4 6 8]
#3: [ 1 4 7 10 13]
linspace = np.linspace(0,1,6)
print(linspace)
linspace = np.linspace(0,10,6)
print(linspace)
# [0. 0.2 0.4 0.6 0.8 1. ]
# [ 0. 2. 4. 6. 8. 10.]
# Ops on Arrays
x = np.array([1,2,3])
y = np.array([4,5,6])
print(x+y)
print(x*y)
# [5 7 9]
# [ 4 10 18]
a = np.array([[1,2],[3,4]])
b = np.array([[11,12],[13,14]])
print(np.dot(a,b))
# [[37 40]
# [85 92]]
# [[1*11+2*13, 1*12+2*14],[3*11+4*13, 3*12+4*14]]
sqrt_array = np.sqrt([1, 4, 9, 16])
print(sqrt_array)
#[1. 2. 3. 4.]
angles = np.array([0, np.pi/2, np.pi])
sin_array = np.sin(angles)
print(sin_array)
#[0.0000000e+00 1.0000000e+00 1.2246468e-16]
log_array = np.log([1, np.e, np.e**2])
print(log_array)
#[0. 1. 2.]
#indexing
a = np.array([10, 20, 30, 40, 50])
print(a[1]) # 20
#slicing
b = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(b[1:, :2])
# will return a submatrix: second row, first two columns
#stats
data = np.array([1, 2, 3, 4, 5])
print(np.mean(data)) #3
print(np.median(data)) #3
print(np.std(data)) #1.4142135623730951
#reshape
a = np.arange(1, 13)
b = a.reshape((3, 4))
print(b)
#flat
flat = b.flatten()
print(flat)
Pandas
# DataFrames
data = {
"age": [5, 12, 4, 18, 10],
"color": ["brown", "cappucino", "white", "tri-color", "black"],
"weight": [9, 112, 156, 6.5, 1.7]
}
animals = pd.DataFrame(data, index=["dog", "deer",
"cow", "cat",
"raven"])
print(animals)
# age color weight
# dog 5 brown 9.0
# deer 12 cappucino 112.0
# cow 4 white 156.0
# cat 18 tri-color 6.5
# raven 10 black 1.7
#DataFrames from dicts
data = [
{'Name': 'Alice', 'Age': 25, 'City': 'New York'},
{'Name': 'Bob', 'Age': 30, 'City': 'San Francisco'},
{'Name': 'Charlie', 'Age': 35, 'City': 'Los Angeles'}
]
df = pd.DataFrame(data)
print(df)
# Name Age City
# 0 Alice 25 New York
# 1 Bob 30 San Francisco
# 2 Charlie 35 Los Angeles
#Series
s = pd.Series([1, 2, 3, 4, 5],
name="Numbers")
print(s)
df = pd.read_csv('MW.csv')
print(df[:4])

print(df.info())
# <class 'pandas.core.frame.DataFrame'>
# RangeIndex: 40 entries, 0 to 39
# Data columns (total 8 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 1 40 non-null object
# 1 Gender 40 non-null object
# 2 Age 40 non-null int64
# 3 IQ 40 non-null int64
# 4 PIQ 40 non-null int64
# 5 Weight 40 non-null object
# 6 Height 40 non-null object
# 7 ID 40 non-null int64
# dtypes: int64(4), object(4)
# memory usage: 2.6+ KB
print(df.describe())

#selected3 columns, first 3 rows
selected_data = df[['Gender', 'Age', "ID"]].iloc[:3]
print(selected_data)
Modifying data
#Adding a new column
df['Salary'] = [50000, 60000, 70000]
print(df)
#Modify an existing column:
df['Age'] = df['Age'] + 1
print(df)
#Deleting a column:
df = df.drop(columns=['Salary'])
print(df)
Data group and merge
#Grouping data and calculating aggregates:
grouped = df.groupby('Age').mean()
print(grouped)
#merge DataFrame:
df1 = pd.DataFrame({
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35]
})
df2 = pd.DataFrame({
'Name': ['Alice', 'Bob', 'David'],
'Salary': [50000, 60000, 70000]
})
merged = pd.merge(df1, df2, on='Name', how='inner')
print(merged)
#Saving data to a CSV file:
df.to_csv('output.csv', index=False)
#Saving data to Excel file:
df.to_excel('output.xlsx', index=False)
Operations on missing data
#Checking for missing values:
print(df.isnull().sum())
#Deleting rows with missing values:
df = df.dropna()
print(df)
#Filling in missing values:
df['Age'] = df['Age'].fillna(df['Age'].mean())
print(df)