#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
Python による統計処理
http://aoki2.si.gunma-u.ac.jp/Python/PythonDir.tar.gz
Brunner-Munzel 検定
http://aoki2.si.gunma-u.ac.jp/Python/Brunner_Munzel_test.pdf
ファイル名: brunnermunzeltest.jl 関数名: brunnermunzeltest
翻訳するときに書いたメモ
閾値の決定とか,何を以て基準統計量とするかとか,色々面倒なところもあるようだな。
なお,正確検定については別解も提示する(予定)
==========#
using StatsBase, Statistics, Rmath, Combinatorics, Printf
function brunnermunzeltest(x, y; alternative = "two_sided", conflevel = 0.95, permutation = false)
res = bmt(x, y, alternative, conflevel)
output(res)
permutation || return
t0 = res[:tstat]
epsilon = eps(Float64)
t0absepsilon = abs(t0) - epsilon
r = length(x)
n = r + length(y)
nCr = binomial(n, r)
z = vcat(x, y)
count = 0
for i in combinations(1:n, r)
flag = falses(n)
flag[i] .= true
t = bmtsimple(z[flag], z[.!flag], alternative, conflevel)
if alternative == "two_sided" && abs(t) >= t0absepsilon
count += 1
elseif alternative == "less" && t <= t0 + epsilon
count += 1
elseif alternative == "greater" && t >= t0 - epsilon
count += 1
end
end
@printf("exact p-value = %.7g\n", count / nCr)
end
function bmt(x, y, alternative, conflevel)
n1 = length(x)
n2 = length(y)
r1 = tiedrank(x)
r2 = tiedrank(y)
r = tiedrank(vcat(x, y))
m1 = mean(r[1:n1])
m2 = mean(r[n1+1:n1+n2])
estimate = (m2 - (n2 + 1) / 2) / n1
v1 = sum((r[1:n1] .- r1 .- (m1 - (n1 + 1) / 2)) .^ 2) / (n1 - 1)
v2 = sum((r[n1+1:n1+n2] .- r2 .- (m2 - (n2 + 1) / 2)) .^ 2) / (n2 - 1)
n1v1 = n1 * v1
n2v2 = n2 * v2
if n1v1 + n2v2 == 0
tstat = df = m1 > m2 ? Inf : -Inf
else
tstat = n1 * n2 * (m1 - m2) / (n1 + n2) / sqrt(n1v1 + n2v2)
df = (n1v1 + n2v2)^2 / (n1v1^2 / (n1 - 1) + n2v2^2 / (n2 - 1))
end
se = sqrt(v1 / (n1 * n2^2) + v2 / (n2 * n1^2))
if alternative == "less"
string = "less than"
pvalue = pt(tstat, df)
confint = (max(0.0, estimate - qt(conflevel, df) * se), 1.0)
elseif alternative == "greater"
string = "greater than"
pvalue = pt(tstat, df, false)
confint = (0.0, min(1.0, estimate + qt(conflevel, df) * se))
else # alternative == "two_sided"
string = "not equal to"
pvalue = 2pt(abs(tstat), df, false)
temp = qt((1 - conflevel) / 2, df, false) * se
confint = (max(0.0, estimate - temp), min(1.0, estimate + temp))
end
return Dict(
:tstat => tstat,
:df => df,
:pvalue => pvalue,
:string => string,
:confint => confint,
:estimate => estimate,
:conflevel => conflevel)
end
function bmtsimple(x, y, alternative, conflevel)
n1 = length(x)
n2 = length(y)
r1 = tiedrank(x)
r2 = tiedrank(y)
r = tiedrank(vcat(x, y))
m1 = mean(r[1:n1])
m2 = mean(r[n1+1:n1+n2])
estimate = (m2 - (n2 + 1) / 2) / n1
v1 = sum((r[1:n1] .- r1 .- (m1 - (n1 + 1) / 2)) .^ 2) / (n1 - 1)
v2 = sum((r[n1+1:n1+n2] .- r2 .- (m2 - (n2 + 1) / 2)) .^ 2) / (n2 - 1)
n1v1 = n1 * v1
n2v2 = n2 * v2
if n1v1 + n2v2 == 0
tstat = m1 > m2 ? Inf : -Inf
else
tstat = n1 * n2 * (m1 - m2) / (n1 + n2) / sqrt(n1v1 + n2v2)
end
end
function output(res)
println("Brunner-Munzel Test")
@printf("t = %.7g, df = %.7g, p-value = %.7g\n",
res[:tstat], res[:df], res[:pvalue])
@printf("alternative hypothesis: true p is %s 0.5\n", res[:string])
println("(1) estimate(original: P(X<Y)+0.5*P(X=Y))")
@printf("%.5g percent confidence interval: [%.5g, %.5g]\n",
100res[:conflevel], res[:confint][1], res[:confint][2])
@printf("sample estimate: %.5g\n", res[:estimate])
println("(2) estimete(mean difference: P(X<Y)-P(X>Y))")
@printf("%.5g percent confidence interval: [%.5g, %.5g]\n",
100res[:conflevel], 2res[:confint][1] - 1, 2res[:confint][2] - 1)
@printf("sample estimate: %.5g\n", 2res[:estimate] - 1)
end
x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1];
y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4];
brunnermunzeltest(x, y) # p-value = 0.005786209
brunnermunzeltest(x, y, alternative="less") # p-value = 0.002893104
brunnermunzeltest(x, y, alternative="greater") # p-value = 0.9971069
brunnermunzeltest(x, y, permutation=true) # exact p-value = 0.008037645
brunnermunzeltest(x, y, permutation=true, alternative="less") # exact p-value = 0.004362857
brunnermunzeltest(x, y, permutation=true, alternative="greater") # exact p-value = 0.9963721