1:
>>> import numpy as np
>>> l1 = [12.5, 11.5, 11.0, 24.0, 13.0]
>>> mean1 = np.mean(l1)
>>> median1 = np.median(l1)
>>>
>>> l2 = [12.5, 11.5, 11.0, 13.0]
>>> mean2 = np.mean(l2)
>>> median2 = np.median(l2)
>>>
>>> mean2 - mean1
-2.4000000000000004
>>> median2 - median1
-0.5
>>>
>>> print(mean1, median1, mean2, median2)
14.4 12.5 12.0 12.0
2:
import numpy as np
l = [4, 5, 7, 7, 7, 8, 10, 11, 11, 13, 13, 14]
q1 = np.percentile(l, 25)
q3 = np.percentile(l, 75)
print("q1, q3:", q1, q3) # 7.0, 11.5
import numpy as np
l = [4, 5, 7, 7, 7, 8, 10, 11, 11, 13, 13, 14]
# NumPy >= 1.22.0 (using the 'method' parameter)
q3_nearest = np.percentile(l, 75, method='nearest') # Returns 11
q3_lower = np.percentile(l, 75, method='lower') # Returns 11
q3_higher = np.percentile(l, 75, method='higher') # Returns 13
print("q3_nearest, q3_lower, q3_higher:")
print(q3_nearest, q3_lower, q3_higher)
# # NumPy < 1.22.0 (using the older 'interpolation' parameter)
# q3_nearest = np.percentile(l, 75, interpolation='nearest') # Returns 11
print("--- CALCULATION AS PER KHAN ACADEMY ---")
median = np.median(l)
print("Median:", median) # 8.0
lower_half = [x for x in l if x <= median]
upper_half = [x for x in l if x >= median]
q1 = np.median(lower_half)
q3 = np.median(upper_half)
print("Q1:", q1) # 7.0
print("Q3:", q3) # 11.0
Output
q1, q3: 7.0 11.5
q3_nearest, q3_lower, q3_higher:
11 11 13
--- CALCULATION AS PER KHAN ACADEMY ---
Median: 9.0
Q1: 7.0
Q3: 12.0
3:
4:
5:
import numpy as np
l = [35, 39, 39, 43, 43, 44]
print(np.mean(l))
6:
import numpy as np
l = [1, 2, 3, 3, 4, 4, 4, 6]
q1 = np.percentile(l, 25)
q3 = np.percentile(l, 75)
print(q1, q3)
iqr = q3 - q1
print(iqr)
print("--- CALCULATION AS PER KHAN ACADEMY ---")
m = np.median(l)
lower = [x for x in l if x <= m]
upper = [x for x in l if x >= m]
print(lower, upper)
q1 = np.median(lower)
q3 = np.median(upper)
print(q1, q3)
iqr = q3 - q1
print(iqr)
7:
8:
q1 = 2
q3 = 5
iqr = q3 - q1
print(iqr)
l = [1] + [2] * 7 + [3] * 5 + [5] * 3 + [6] * 2 + [7, 9]
print(l)
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
print(lower_bound, upper_bound)
outliers = [x for x in l if x < lower_bound or x > upper_bound]
print(outliers)
print(len(outliers))
Tags: Data Analytics,Mathematical Foundations for Data Science,
No comments:
Post a Comment