Tuesday, June 9, 2026

Quiz on "Summarizing quantitative data" (Jun 2026)

1:

>>> import numpy as np
>>> l1 = [12.5, 11.5, 11.0, 24.0, 13.0]
>>> mean1 = np.mean(l1)
>>> median1 = np.median(l1)
>>> 
>>> l2 = [12.5, 11.5, 11.0, 13.0]
>>> mean2 = np.mean(l2)
>>> median2 = np.median(l2)
>>> 
>>> mean2 - mean1
-2.4000000000000004
>>> median2 - median1
-0.5
>>> 
>>> print(mean1, median1, mean2, median2)
14.4 12.5 12.0 12.0

2:

import numpy as np

l = [4, 5, 7, 7, 7, 8, 10, 11, 11, 13, 13, 14]

q1 = np.percentile(l, 25)
q3 = np.percentile(l, 75)

print("q1, q3:", q1, q3) # 7.0, 11.5

import numpy as np

l = [4, 5, 7, 7, 7, 8, 10, 11, 11, 13, 13, 14]

# NumPy >= 1.22.0 (using the 'method' parameter)
q3_nearest = np.percentile(l, 75, method='nearest')  # Returns 11
q3_lower   = np.percentile(l, 75, method='lower')    # Returns 11
q3_higher  = np.percentile(l, 75, method='higher')   # Returns 13

print("q3_nearest, q3_lower, q3_higher:")
print(q3_nearest, q3_lower, q3_higher)


# # NumPy < 1.22.0 (using the older 'interpolation' parameter)
# q3_nearest = np.percentile(l, 75, interpolation='nearest')  # Returns 11

print("---  CALCULATION AS PER KHAN ACADEMY ---")

median = np.median(l)
print("Median:", median) # 8.0

lower_half = [x for x in l if x <= median]
upper_half = [x for x in l if x >= median]

q1 = np.median(lower_half)
q3 = np.median(upper_half)
print("Q1:", q1) # 7.0
print("Q3:", q3) # 11.0
Output
q1, q3: 7.0 11.5
q3_nearest, q3_lower, q3_higher:
11 11 13
---  CALCULATION AS PER KHAN ACADEMY ---
Median: 9.0
Q1: 7.0
Q3: 12.0

3:

4:

5:

import numpy as np 

l = [35, 39, 39, 43, 43, 44]

print(np.mean(l)) 

6:

import numpy as np

l = [1, 2, 3, 3, 4, 4, 4, 6]

q1 = np.percentile(l, 25)
q3 = np.percentile(l, 75)

print(q1, q3)

iqr = q3 - q1
print(iqr)


print("--- CALCULATION AS PER KHAN ACADEMY ---")

m = np.median(l)

lower = [x for x in l if x <= m]
upper = [x for x in l if x >= m]

print(lower, upper)
q1 = np.median(lower)
q3 = np.median(upper)
print(q1, q3)

iqr = q3 - q1
print(iqr)

7:

8:

q1 = 2
q3 = 5
iqr = q3 - q1
print(iqr)

l = [1] + [2] * 7 + [3] * 5 + [5] * 3 + [6] * 2 + [7, 9]

print(l)

lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr

print(lower_bound, upper_bound)

outliers = [x for x in l if x < lower_bound or x > upper_bound]
print(outliers)
print(len(outliers))
Tags: Data Analytics,Mathematical Foundations for Data Science,

No comments:

Post a Comment