onewayanova.py

Created on August 22, 2024
2.77 KB
Perform a One-Way ANOVA with three groups using Python on the NumWorks calculator.
To use this script, change the values in the groups to represent your dataset and then execute the script.
import math

# Sample data: three groups with different values
group1 = [85, 78, 92, 88, 76, 81, 79, 84, 90, 77, 83, 86, 89, 91, 80]
group2 = [90, 85, 88, 91, 87, 92, 89, 86, 93, 84, 88, 90, 87, 85, 91]
group3 = [75, 80, 78, 82, 77, 79, 81, 76, 83, 78, 80, 77, 82, 79, 81]

# Combine all groups
all_data = group1 + group2 + group3

# Calculate means
mean1 = sum(group1) / len(group1)
mean2 = sum(group2) / len(group2)
mean3 = sum(group3) / len(group3)
overall_mean = sum(all_data) / len(all_data)

# Calculate sum of squares between groups (SSB)
ssb = len(group1) * (mean1 - overall_mean) ** 2 + len(group2) * (mean2 - overall_mean) ** 2 + len(group3) * (mean3 - overall_mean) ** 2

# Calculate sum of squares within groups (SSW)
ssw = sum((x - mean1) ** 2 for x in group1) + sum((x - mean2) ** 2 for x in group2) + sum((x - mean3) ** 2 for x in group3)

# Degrees of freedom
df_between = 2  # Number of groups - 1
df_within = len(all_data) - 3  # Total number of observations - number of groups

# Mean squares
ms_between = ssb / df_between
ms_within = ssw / df_within

# F-statistic
f_statistic = ms_between / ms_within
f_statistic = round(f_statistic, 4)

# Function to calculate the cumulative distribution function (CDF) for the F-distribution
def f_cdf(f, df1, df2):
    x = (df1 * f) / (df1 * f + df2)
    a = df1 / 2.0
    b = df2 / 2.0
    return betainc(a, b, x)

# Function to calculate the incomplete beta function
def betainc(a, b, x):
    bt = math.exp(math.lgamma(a + b) - math.lgamma(a) - math.lgamma(b) + a * math.log(x) + b * math.log(1.0 - x))
    if x < (a + 1.0) / (a + b + 2.0):
        return bt * betacf(a, b, x) / a
    else:
        return 1.0 - bt * betacf(b, a, 1.0 - x) / b

# Function to calculate the continued fraction for the incomplete beta function
def betacf(a, b, x):
    MAXIT = 100
    EPS = 3.0e-7
    FPMIN = 1.0e-30

    qab = a + b
    qap = a + 1.0
    qam = a - 1.0
    c = 1.0
    d = 1.0 - qab * x / qap
    if abs(d) < FPMIN:
        d = FPMIN
    d = 1.0 / d
    h = d
    for m in range(1, MAXIT + 1):
        m2 = 2 * m
        aa = m * (b - m) * x / ((qam + m2) * (a + m2))
        d = 1.0 + aa * d
        if abs(d) < FPMIN:
            d = FPMIN
        c = 1.0 + aa / c
        if abs(c) < FPMIN:
            c = FPMIN
        d = 1.0 / d
        h *= d * c
        aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2))
        d = 1.0 + aa * d
        if abs(d) < FPMIN:
            d = FPMIN
        c = 1.0 + aa / c
        if abs(c) < FPMIN:
            c = FPMIN
        d = 1.0 / d
        del_ = d * c
        h *= del_
        if abs(del_ - 1.0) < EPS:
            break
    return h

# Calculate p-value
p_value = 1 - f_cdf(f_statistic, df_between, df_within)
p_value = round(p_value, 4)
print("F-statistic:", f_statistic)
print("p-value:", p_value)
My Python Scripts

Library

My scripts

My calculator

onewayanova.py