# I was greatly inspired by StatsWithJuliaBook
# https://statisticswithjulia.org/index.html
using Pkg
Pkg.activate("MAA204")
using StatsPlots, LaTeXStrings, Measures, LinearAlgebra, Distributions, Random, HypothesisTests, CSV, DataFrames;

  Activating environment at `C:\Users\david\Nextcloud\cours\MAA_204_Statistics\notebook_julia\MAA204\Project.toml`


x(t) = cos(t)
y(t) = sin(t)

plot(x, y, 0, π/2, leg=false, fill=(0,:orange), aspect_ratio = :equal)


N = 1000000
X = rand(N)
Y = rand(N)
pi_estimate = 4*count(X.^2 + Y.^2 .< 1)/N

3.142796


plot([n-> 4*count(rand(n).^2+rand(n).^2 .< 1)/n- π, n-> 1/sqrt(n)],1:1000, label = [L"\hat{\pi}_N-\pi" L"1/\sqrt{N}"])
xlabel!(L"N")


N = 10^5
X = rand(N)
Y = rand(N)
plot([n-> abs(4*count(rand(n).^2+rand(n).^2 .< 1)/n- π),n-> 1/sqrt(n)],1:10000,yscale=:log10,xscale=:log10, label = [L"|\hat{\pi}_N-\pi|" L"1/\sqrt{N}"])
xlabel!(L"N")


tau = 18 # Vertical line
mu0, mu1, sd  = 15, 20, 2
dist0, dist1 = Normal(mu0,sd), Normal(mu1,sd)
grid = 5:0.1:27
h0grid, h1grid = tau:0.1:25, 5:0.1:tau

println("Probability of Type I error: ", ccdf(dist0,tau))
println("Probability of Type II error: ", cdf(dist1,tau))

plot(grid, pdf.(dist0,grid), size=(800,600),
	c=:blue, label="μ = $(mu0)")
plot!(h0grid, pdf.(dist0, h0grid), 
	c=:blue, fa=0.2, fillrange=[0 1], label="")
plot!(grid, pdf.(dist1,grid), 
	c=:green, label="μ = $(mu1)")
plot!(h1grid, pdf.(dist1, h1grid), 
	c=:green, fa=0.2, fillrange=[0 1], label="")
plot!([tau, 27],[0,0],
	c=:red, lw=3, label="Rejection region", 
	xlims=(5, 27), ylims=(0,0.25) , legend=:topleft,
    xlabel="x", ylabel="Density")
vline!([tau], c=4, ls = :dash, label = :none )
annotate!([(tau-0.85, 0.01, text(L"\beta")),(tau+0.75, 0.01, text(L"\alpha")),
            (15, 0.21, text(L"H_0")),(mu1, 0.21, text(L"H_1"))])

Probability of Type I error: 0.06680720126885804
Probability of Type II error: 0.15865525393145702


plot([τ -> ccdf(dist0,τ), τ -> cdf(dist1,τ)],10,25, label = ["Type I error" "Type II error"])


p = [1/6, 1/6, 1/6, 1/6, 1/6, 1/6]
O = [8, 11, 12, 17, 2, 10]
M = length(O)
n = sum(O)
E = n*p

testStatistic = sum((O-E).^2 ./E)
pVal = 1-cdf(Chisq(M-1), testStatistic)

println("Manually calculated test statistic: ", testStatistic)
println("Manually calculated p-value: ", pVal,"\n")

println(ChisqTest(O,p))

Manually calculated test statistic: 12.200000000000001
Manually calculated p-value: 0.03214774253615793

Pearson's Chi-square Test
-------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.166667, 0.166667, 0.166667, 0.166667, 0.166667, 0.166667]
    point estimate:          [0.133333, 0.183333, 0.2, 0.283333, 0.0333333, 0.166667]
    95% confidence interval: [(0.01667, 0.2642), (0.06667, 0.3142), (0.08333, 0.3309), (0.1667, 0.4142), (0.0, 0.1642), (0.05, 0.2976)]

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           0.0321

Details:
    Sample size:        60
    statistic:          12.2
    degrees of freedom: 5
    residuals:          [-0.632456, 0.316228, 0.632456, 2.21359, -2.52982, 0.0]
    std. residuals:     [-0.69282, 0.34641, 0.69282, 2.42487, -2.77128, 0.0]

O

6-element Vector{Int64}:
  8
 11
 12
 17
  2
 10

E

6-element Vector{Float64}:
 10.0
 10.0
 10.0
 10.0
 10.0
 10.0

n

60


plot(Chisq(M-1), label="χ^2 (6-1)")
plot!(Chisq(M-1),testStatistic,25,fillrange=[0 1],label=:none,alpha=0.5)
vline!([testStatistic],label="test Statistic",alpha=0.5)

Monte Carlo¶

Monte Carlo example: Compute $\pi$¶

Hypothesis Testing¶

Chi square test¶