# Applying basis statistics functions on given data set in terms of list

# Available python packages to implement the same : Pandas, NumPy, SciPy & statsmodels


# For Ex - 

DataSet = [13, 15, 16, 16, 19, 20, 20, 21, 22, 22, 25, 25, 25, 25, 30, 33, 33, 35, 35, 35, 36, 40, 45, 46, 52, 70]

# Sum of all elements using simple built in sum function

print("Sum of all items of Data Set : " + str(sum(DataSet)))

# Getting Count of each items using counter collection 

""" 
Counter is an unordered collection where elements are stored as Dict keys and their count as dict value 

"""

from collections import Counter 
print("Count of each items in Data Set : ")
print(Counter(DataSet))

# Use of statistics module

import statistics as st

# Mean -> Sum of all data items / total no of data items

print("Mean of Data Set : ")
print(st.mean(DataSet))

# Median -> Average of two items exist in mid of data set 

print("Median of Data Set : ")
print(st.median(DataSet))

# Mode -> Item with highest frequency of appearance 

print("Mode of Data Set : ")
print(st.mode(DataSet))

# Mid-range -> Average of MaxVale And MinValue item

print("Mid Range Value Of Data Set : ")
print(st.mean([max(DataSet), min(DataSet)]))

# Other Useful statistical measures

print("Quantiles Of Data Set : ")
print(st.quantiles(data = DataSet, n = 4)) # [20.0, 25.0, 35.25]
print("Std. Deviation Of Data Set : ")
print(st.stdev(DataSet))
print("Variance Of Data Set : ")
print(st.variance(DataSet))

Sum of all items of Data Set : 774
Count of each items in Data Set : 
Counter({25: 4, 35: 3, 16: 2, 20: 2, 22: 2, 33: 2, 13: 1, 15: 1, 19: 1, 21: 1, 30: 1, 36: 1, 40: 1, 45: 1, 46: 1, 52: 1, 70: 1})
Mean of Data Set : 
29.76923076923077
Median of Data Set : 
25.0
Mode of Data Set : 
25
Mid Range Value Of Data Set : 
41.5
Quantiles Of Data Set : 
[20.0, 25.0, 35.25]
Std. Deviation Of Data Set : 
13.158442741624686
Variance Of Data Set : 
173.14461538461538


import pandas as pd


df = pd.read_csv('HeightWeight.csv')


df.head()


st.correlation(df['Height(Inches)'], df['Weight(Pounds)'])

0.5028585206028441


# New in version 3.10
slope, intercept = st.linear_regression(df['Height(Inches)'], df['Weight(Pounds)'])


slope, intercept

(3.0834764454029657, -82.57574306454092)


import pandas as pd
import matplotlib.pyplot as plt

#Read the data
df = pd.read_csv(r'bodyfat.csv')  

#Adding title
plt.title("Scatter Plot using matplotlib")
#Plotting the data
plt.scatter(df['BodyFat'], df['Age'],alpha=0.8)
plt.show()


import pandas as pd
import matplotlib.pyplot as plt

#Read the data
df = pd.read_csv(r'bodyfat.csv')  

data=df[['BodyFat', 'Age']]
#Adding title
plt.title("Box Plot using matplotlib")
# Plotting the box plot
plt.boxplot(data,patch_artist=(True))
#Showing the plot
plt.show()

	Index	Height(Inches)	Weight(Pounds)
0	1	65.78331	112.9925
1	2	71.51521	136.4873
2	3	69.39874	153.0269
3	4	68.21660	142.3354
4	5	67.78781	144.2971

survival8

Pages

Monday, May 29, 2023

Ch 2 - Descriptive Statistics

Descriptive Data Summarization

Practice Problem

Percentile

Practice Problem

Box Plot

Practice Problem

Box Plot for Outlier Analysis

Variance and Standard Deviation

Scatter Plot

In 2D

In 3D

Practice Problem

Correlation

Now In Code

Linear Regression¶

HOW ARE SCATTER PLOT AND CORRELATION CONCEPT RELATED?

No comments:

Post a Comment