import numpy as np
import math
from scipy.stats import norm, t


s1 = [95, 90, 91, 92]
s2 = [97, 94, 89, 90]
s3 = [85, 84, 79, 80]


# calculate the means
m1 = np.average(s1)
m2 = np.average(s2)
m3 = np.average(s3)
print("m1", m1)
print("m2", m2)
print("m3", m3)

m1 92.0
m2 92.5
m3 82.0


# calculate the variances (w/ bessel's correction)
# ddof = 1 has it use bessel's correction
v1 = np.var(s1, ddof = 1)
v2 = np.var(s2, ddof = 1)
v3 = np.var(s3, ddof = 1)
print("v1", v1)
print("v2", v2)
print("v3", v3)

v1 4.666666666666667
v2 13.666666666666666
v3 8.666666666666666


# calculate the t-value
s1_n = len(s1)
s2_n = len(s2)
# numerator
signal = abs(m1 - m2)
# denominator
noise = math.sqrt((v1/s1_n) + (v2/s2_n))
t_val = signal / noise
print("sample 1 vs 2 t value", t_val)

sample 1 vs 2 t value 0.2335496832484569


# find the p-value via the scipy package:
# use the cdf of t values to see how far through
# the distribution this t value is
# pass in the degrees of freedom
# This is the one-tailed value
print(1 - t.cdf(t_val, df = (s1_n + s2_n - 2)))

0.41154942928551863