#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
R の mantelhaen.test()
ファイル名: mantelhaentest.jl 関数名: mantelhaentest
翻訳するときに書いたメモ
R では,exact = TRUE の場合の信頼区間の計算において,uniroot の tol がデフォルトのままの .Machine$double.eps^0.25 になっているので解の精度が不十分である。
Exact conditional test of independence in 2 x 2 x k tables
data: x
S = 16, p-value = 0.03994490358127
alternative hypothesis: true common odds ratio is not equal to 1
95 percent confidence interval:
1.07740143018898 529.83739902335230
sample estimates:
common odds ratio
10.36102408137496
.Machine$double.eps^0.25
[1] 0.0001220703125
なので,小数点以下3桁程度の精度しかない。(信頼区間の推定値としては十分であろうが)
tol = Machine$double.eps にすれば,Julia での解と同じになる。
Exact conditional test of independence in 2 x 2 x k tables
data: x
S = 16, p-value = 0.03994490358127
alternative hypothesis: true common odds ratio is not equal to 1
95 percent confidence interval:
1.077387763518033 531.512782889939672
sample estimates:
common odds ratio
10.36102408137498
なお,mantelhaen.test() 中の d2x2xk() は C によるプログラムを呼ぶことになっているが,以下のような計算をするだけのものである。
d2x2xk = function(K, m, n, t, d) {
y = pmax(0, t - n)
z = pmin(m, t)
tmy = t - y
mpn = m + n
zmy = z - y
tbl = matrix(0, K + 1, sum(zmy) + 1)
pos = 0
tbl[1, 1] = 1
for (i in 1:K) {
for (j in 1:(zmy[i]+1)) {
u = dhyper(tmy[i] - j+1, t[i], mpn[i] - t[i], n[i])
tbl[i + 1, j:(j + pos)] = tbl[i + 1, j:(j + pos)] + tbl[i, 1:(pos + 1)] * u
}
pos = pos + zmy[i]
}
return(tbl[K, ] / sum(tbl[K, ]))
}
==========#
using Match, Rmath, LinearAlgebra, Roots, Printf
function mantelhaentest(x, y = [], z = []; alternative = "twosided", correct = true, exact = false, conflevel = 0.95)
if typeof(x) == Array{Int64, 3}
any(size(x) .< 2) && error("each dimension in table must be >= 2")
else
length(y) == 0 && error("if 'x' is not an array, 'y' must be given")
length(z) == 0 && error("if 'x' is not an array, 'z' must be given")
any(diff([length(x), length(y), length(z)]) .!= 0) && error("'x', 'y', and 'z' must have the same length")
levelsx, tblx = table(x)
levelsy, tbly = table(y)
(length(levelsx) < 2 || length(levelsy) < 2) && error("'x' and 'y' must have at least 2 levels")
x = table(x, y, z)
end
any(sum(x, dims=[1, 2]) .< 2) && error("sample size in each stratum must be > 1")
I, J, K = size(x)
if I == 2 && J == 2
words = (["not equal to", "less than", "greater than"][indexin([alternative], ["twosided", "less", "greater"])])[1]
if !exact
method = "Mantel-Haenszel chi-squared test with continuity correction"
sx = reshape(sum(x, dims=2), I, K)
sy = reshape(sum(x, dims=1), J, K)
n = vec(sum(x, dims=[1, 2]))
delta = sum(x[1, 1, :] .- sx[1, :] .* sy[1, :] ./ n)
yates = correct && abs(delta) >= 0.5 ? 0.5 : 0
denom = sum(vec(prod(vcat(sx, sy), dims=1)) ./ (n .^ 2 .* (n .- 1)))
statistic = (abs(delta) - yates) ^ 2 / denom
df = 1
if alternative == "twosided"
pvalue = pchisq(statistic, df, false)
else
z = sign(delta) * sqrt(statistic)
pvalue = pnorm(z, alternative == "less")
end
sdiag = sum(x[1, 1, :] .* x[2, 2, :] ./ n)
soffd = sum(x[1, 2, :] .* x[2, 1, :] ./ n)
estimate = sdiag / soffd
sd = sqrt(sum((x[1, 1, :] .+ x[2, 2, :]) .* x[1, 1, :] .* x[2, 2, :] ./ n .^ 2) /
(2 * sdiag ^ 2) +
sum(((x[1, 1, :] .+ x[2, 2, :]) .* x[1, 2, :] .* x[2, 1, :] +
(x[1, 2, :] .+ x[2, 1, :]) .* x[1, 1, :] .* x[2, 2, :]) ./ n .^ 2) /
(2 * sdiag * soffd) +
sum((x[1, 2, :] .+ x[2, 1, :]) .* x[1, 2, :] .* x[2, 1, :] ./ n .^ 2) /
(2 * soffd ^ 2))
cint = @match alternative begin
"less" => [0, estimate * exp(qnorm(conflevel) * sd)]
"greater" => [estimate * exp(qnorm(conflevel, false) * sd), Inf]
"twosided" => estimate .* exp.([1, -1] .* qnorm((1 - conflevel) / 2) * sd)
end
println(method)
@printf("Mantel-Haenszel X-squared = %.5f, df = %d, p value = %.5f\n", statistic, df, pvalue)
@printf("alternative hypothesis: true common odds ratio is %s 1\n", words)
@printf("%g %% confidence interval: [%.6f, %.6f]\n", 100conflevel, cint[1], cint[2])
@printf("sample estimates: common odds ratio %g", estimate)
Dict(:statistic => statistic, :df => df, :pvalue => pvalue,
:conflevel => conflevel, :cint => cint, :method => method)
else
function dn2x2xk(ncp)
(length(ncp) == 1 && ncp == 1) && return dc
d = logdc .+ log.(ncp) .* support
d = exp.(d .- maximum(d))
d ./ sum(d)
end
function mn2x2xk(ncp)
ncp == 0 && return lo
ncp == Inf && return hi
sum(support .* dn2x2xk(ncp))
end
function pn2x2xk(q, ncp = 1; uppertail = false)
if length(ncp) == 1 && ncp == 0
uppertail ? float(q <= lo) : float(q >= lo)
elseif length(ncp) == 1 && ncp == Inf
uppertail ? float(q <= hi) : float(q >= hi)
else
d = dn2x2xk(ncp)
if uppertail
select = support .>= q
else
select = support .<= q
end
sum(d[select])
end
end
function mle(x)
x == lo && return 0
x == hi && return Inf
mu = mn2x2xk(1)
if mu > x
find_zero(t -> mn2x2xk(t) - x, (0, 1))
elseif mu < x
1 / find_zero(t -> mn2x2xk(1 / t) - x, (eps(), 1))
else
1
end
end
function ncpU(x, alpha)
x == hi && return Inf
p = pn2x2xk(x, 1)
if p < alpha
find_zero(t -> pn2x2xk(x, t) - alpha, (0, 1))
elseif p > alpha
1 / find_zero(t -> pn2x2xk(x, 1 / t) - alpha, (eps(), 1))
else
1
end
end
function ncpL(x, alpha)
x == lo && return 0
p = pn2x2xk(x, 1; uppertail = true)
if p > alpha
find_zero(t -> pn2x2xk(x, t, uppertail = true) - alpha, (0, 1))
elseif p < alpha
1 / find_zero(t -> pn2x2xk(x, 1 / t, uppertail = true) - alpha, (eps(), 1))
else
1
end
end
method = "Exact conditional test of independence in 2 x 2 x k tables"
mn = reshape(sum(x, dims=1), I, :)
m = mn[1, :]
n = mn[2, :]
t = reshape(sum(x, dims= 2), J, :)[1, :]
s = sum(x[1, 1, :])
lo = sum(pmax(0, t - n))
hi = sum(pmin(m, t))
support = lo:hi
dc = d2x2xk(K, m, n, t, hi - lo + 1)
logdc = log.(dc)
relErr = 1.0000001
alpha2 = (1 - conflevel) / 2
pvalue, cint = @match alternative begin
"less" => (pn2x2xk(s, 1), [0, ncpU(s, 1 - conflevel)])
"greater" => (pn2x2xk(s, 1, uppertail = true), [ncpL(s, 1 - conflevel), Inf])
"twosided" => (sum(dc[dc .<= dc[s - lo + 1] * relErr]), [ncpL(s, alpha2), ncpU(s, alpha2)])
end
statistic = s
estimate = mle(s)
println(method)
@printf("S = %g, p value = %.5f\n", statistic, pvalue)
@printf("alternative hypothesis: true common odds ratio is %s 1\n", words)
@printf("%g %% confidence interval: [%.6f, %.6f]\n", 100conflevel, cint[1], cint[2])
@printf("sample estimates: common odds ratio %g", estimate)
Dict(:statistic => statistic, :pvalue => pvalue,
:conflevel => conflevel, :cint => cint, :method => method)
end
else
df = (I - 1) * (J - 1)
n = zeros(df)
m = zeros(df)
V = zeros(df, df)
for k = 1:K
f = x[:, :, k]
ntot = sum(f)
rowsums = sum(f, dims=2)[1:I-1]
colsums = sum(f, dims=1)[1:J-1]
n .+= vec(f[1:I-1, 1:J-1])
m .+= vec(rowsums .* colsums' ./ ntot)
V .+= kron(
diag(ntot .* colsums, J - 1) .- (colsums .* colsums'),
diag(ntot .* rowsums, I - 1) .- (rowsums .* rowsums')) /
(ntot ^ 2 * (ntot - 1))
end
n = n - m
statistic = n' * (V \ n)
df = df
pvalue = pchisq(statistic, df, false)
method = "Cochran-Mantel-Haenszel test"
println(method)
@printf("Cochran-Mantel-Haenszel M^2 = %.5f, df = %d, p value = %.6f\n", statistic, df, pvalue)
Dict(:statistic => statistic, :df => df, :pvalue => pvalue, :method => method)
end
end
function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end
function table(x, y, z) # 三次元
indicesx = sort(unique(x))
indicesy = sort(unique(y))
indicesz = sort(unique(z))
counts = zeros(Int, length(indicesx), length(indicesy), length(indicesz))
for (i, j, k) in zip(indexin(x, indicesx), indexin(y, indicesy), indexin(z, indicesz))
counts[i, j, k] += 1
end
return indicesx, indicesy, indicesz, counts
end
function diag(x, n)
a = zeros(n, n)
[a[i, i] = x[i] for i = 1:n]
a
end
function adjustlength(x, y)
if length(x) == 1
x = fill(x, length(y))
elseif length(y) == 1
y = fill(y, length(x))
end
length(x) != length(y) && error("length(x) != length(y)")
x, y
end
function pmin(x, y)
x, y = adjustlength(x, y)
[min(x0, y0) for (x0, y0) in zip(x, y)]
end
function pmax(x, y)
x, y = adjustlength(x, y)
[max(x0, y0) for (x0, y0) in zip(x, y)]
end
function d2x2xk(K, m, n, t, d)
y = pmax(0, t - n)
z = pmin(m, t)
tmy = t - y
mpn = m + n
zmy = z - y
tbl = zeros(K + 1, sum(zmy) + 1)
pos = 0
tbl[1, 1] = 1
for i = 1:K
for j = 1:zmy[i]+1
u = dhyper(tmy[i] - j+1, t[i], mpn[i] - t[i], n[i])
tbl[i + 1, j:j + pos] .+= tbl[i, 1:pos + 1] * u
end
pos += zmy[i]
end
tbl[K, :] / sum(tbl[K, :])
end
x = reshape([0, 0, 6, 5, 3, 0, 3, 6, 6, 2, 0, 4, 5, 6, 1, 0, 2,
5, 0, 0], 2, 2, :);
mantelhaentest(x);
#=====
Mantel-Haenszel chi-squared test with continuity correction
Mantel-Haenszel X-squared = 3.92857, df = 1, p value = 0.04747
alternative hypothesis: true common odds ratio is not equal to 1
95 % confidence interval: [1.026713, 47.725133]
sample estimates: common odds ratio 7
=====#
using RCall
R"mantelhaen.test($x)"
#=====
Mantel-Haenszel chi-squared test with continuity correction
data: `#JL`$x
Mantel-Haenszel X-squared = 3.9286, df = 1, p-value = 0.04747
alternative hypothesis: true common odds ratio is not equal to 1
95 percent confidence interval:
1.026713 47.725133
sample estimates:
common odds ratio
7
=====#
mantelhaentest(x, alternative="less");
#=====
Mantel-Haenszel chi-squared test with continuity correction
Mantel-Haenszel X-squared = 3.92857, df = 1, p value = 0.97626
alternative hypothesis: true common odds ratio is less than 1
95 % confidence interval: [0.000000, 35.052454]
sample estimates: common odds ratio 7
=====#
R"mantelhaen.test($x, alternative=\"less\")"
#=====
Mantel-Haenszel chi-squared test with continuity correction
data: `#JL`$x
Mantel-Haenszel X-squared = 3.9286, df = 1, p-value = 0.9763
alternative hypothesis: true common odds ratio is less than 1
95 percent confidence interval:
0.00000 35.05245
sample estimates:
common odds ratio
7
=====#
mantelhaentest(x, alternative="greater");
#=====
Mantel-Haenszel chi-squared test with continuity correction
Mantel-Haenszel X-squared = 3.92857, df = 1, p value = 0.02374
alternative hypothesis: true common odds ratio is greater than 1
95 % confidence interval: [1.397905, Inf]
sample estimates: common odds ratio 7
=====#
R"mantelhaen.test($x, alternative=\"greater\")"
#=====
Mantel-Haenszel chi-squared test with continuity correction
data: `#JL`$x
Mantel-Haenszel X-squared = 3.9286, df = 1, p-value = 0.02374
alternative hypothesis: true common odds ratio is greater than 1
95 percent confidence interval:
1.397905 Inf
sample estimates:
common odds ratio
7
=====#
mantelhaentest(x, exact=true);
#=====
Exact conditional test of independence in 2 x 2 x k tables
S = 16, p value = 0.03994
alternative hypothesis: true common odds ratio is not equal to 1
95 % confidence interval: [1.077388, 531.512783]
sample estimates: common odds ratio 10.361
=====#
R"mantelhaen.test($x, exact=TRUE)"
#=====
Exact conditional test of independence in 2 x 2 x k tables
data: `#JL`$x
S = 16, p-value = 0.03994
alternative hypothesis: true common odds ratio is not equal to 1
95 percent confidence interval:
1.077401 529.837399
sample estimates:
common odds ratio
10.36102
=====#
mantelhaentest(x, exact=true, alternative="less");
#=====
Exact conditional test of independence in 2 x 2 x k tables
S = 16, p value =0.99862
alternative hypothesis: true common odds ratio is less than 1
95 % confidence interval: [0.000000, 261.487879]
sample estimates: common odds ratio 10.361
=====#
R"mantelhaen.test($x, exact=TRUE, alternative=\"less\")"
#=====
Exact conditional test of independence in 2 x 2 x k tables
data: `#JL`$x
S = 16, p-value = 0.9986
alternative hypothesis: true common odds ratio is less than 1
95 percent confidence interval:
0.0000 262.2733
sample estimates:
common odds ratio
10.36102
=====#
mantelhaentest(x, exact=true, alternative="greater");
#=====
Exact conditional test of independence in 2 x 2 x k tables
S = 16, p value = 0.01997
alternative hypothesis: true common odds ratio is greater than 1
95 % confidence interval: [1.384222, Inf]
sample estimates: common odds ratio 10.361
=====#
R"mantelhaen.test($x, exact=TRUE, alternative=\"greater\")"
#=====
Exact conditional test of independence in 2 x 2 x k tables
data: `#JL`$x
S = 16, p-value = 0.01997
alternative hypothesis: true common odds ratio is greater than 1
95 percent confidence interval:
1.384239 Inf
sample estimates:
common odds ratio
10.36102
=====#
#####################################################################################
x = reshape([8, 3, 0, 3, 0, 2, 0, 3, 3, 5, 0, 3,
2, 1, 2, 8, 2, 0, 2, 2, 0, 0, 1, 1,
0, 1, 0, 8, 2, 1, 8, 8, 0, 5, 5, 8,
0, 1, 0, 5, 2, 5, 1, 1, 2, 3, 3, 0,
5, 0, 2, 0, 0, 2, 1, 2, 2, 1, 0, 1,
5, 8, 1, 1, 0, 5, 0, 8, 1, 0, 5, 8], 3, 4, :)
mantelhaentest(x);
#=====
Cochran-Mantel-Haenszel test
Cochran-Mantel-Haenszel M^2 = 33.32493, df = 6, p value = 0.000009
=====#
R"mantelhaen.test($x)"
#=====
Cochran-Mantel-Haenszel test
data: `#JL`$x
Cochran-Mantel-Haenszel M^2 = 33.325, df = 6, p-value = 9.079e-06
=====#