#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
Log rank 検定
http://aoki2.si.gunma-u.ac.jp/R/logrank.html
ファイル名: logrank.jl 関数名: logrank
翻訳するときに書いたメモ
NaN を除いて統計量を計算するには
1行関数で sumrmnan(x) = sum(x[.!isnan.(x)]) のように定義すると良い
==========#
using Rmath, Printf
function logrank(group, event, time; method = "SAS") # or method = "Tominaga"
sumrmnan(x) = sum(x[.!isnan.(x)])
len = length(group)
length(event) == length(time) == len || error("length differ")
indexx, indexy, tg = table(vcat(time, NaN, NaN, NaN, NaN),
vcat(group, 1, 1, 2, 2) .* 10 .+ vcat(event, 1, 0, 1, 0));
tg = tg[1:end-1, :]
k = size(tg, 1)
index, (nia, nib) = table(group)
na = vcat(nia, (repeat([nia], k) - cumsum(tg[:, 1] + tg[:, 2]))[1:k - 1]);
nb = vcat(nib, (repeat([nib], k) - cumsum(tg[:, 3] + tg[:, 4]))[1:k - 1]);
da = tg[:, 2];
db = tg[:, 4];
dt = da + db;
nt = na + nb;
d = dt ./ nt;
O = [sum(da), sum(db)];
ea = na .* d;
eb = nb .* d;
E = [sumrmnan(ea), sumrmnan(eb)]
@printf(" %4s %3s %3s %3s %3s %3s %3s %10s %10s %10s\n",
"time", "da", "db", "dt", "na", "nb", "nt", "d", "ea", "eb")
for i = 1:k
@printf("%5d %4d %4d %4d %4d %4d %4d %11.8f %11.8f %11.8f\n",
indexx[i], da[i], db[i], dt[i], na[i], nb[i], nt[i],
d[i], ea[i], eb[i])
end
if method == "Tominaga"
method = "ログランク検定(富永)"
chi = sum((O - E) .^ 2 ./ E)
else
method = "ログランク検定(一般的)"
v = sumrmnan( dt .* (nt - dt) ./ (nt .- 1) .* na ./ nt .* (1 .- na ./ nt))
chi = (sumrmnan(da) - sumrmnan(na .* d)) ^ 2 / v
end
P = pchisq(chi, 1, false)
println(method)
println("chisq. = $chi, df = 1, p value = $P")
Dict(:chi => chi, :df => df, :pvalue => P, :method => method)
end
function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end
function table(x, y) # 二次元
indicesx = sort(unique(x))
indicesy = sort(unique(y))
counts = zeros(Int, length(indicesx), length(indicesy))
for (i, j) in zip(indexin(x, indicesx), indexin(y, indicesy))
counts[i, j] += 1
end
return indicesx, indicesy, counts
end
group = [1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1];
event = [1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0];
time = [2, 20, 5, 1, 3, 17, 2, 3, 15, 14, 12, 13, 11, 11, 10, 8, 8, 3, 7, 3, 6, 2, 5, 4, 2, 3, 1, 3, 2, 1];
logrank(group, event, time)
# time da db dt na nb nt d ea eb
# 1 0 2 2 16 14 30 0.06666667 1.06666667 0.93333333
# 2 2 2 4 15 12 27 0.14814815 2.22222222 1.77777778
# 3 0 1 1 12 10 22 0.04545455 0.54545455 0.45454545
# 4 0 0 0 9 7 16 0.00000000 0.00000000 0.00000000
# 5 0 1 1 9 6 15 0.06666667 0.60000000 0.40000000
# 6 0 0 0 8 5 13 0.00000000 0.00000000 0.00000000
# 7 1 0 1 7 5 12 0.08333333 0.58333333 0.41666667
# 8 0 1 1 6 5 11 0.09090909 0.54545455 0.45454545
# 10 0 0 0 6 3 9 0.00000000 0.00000000 0.00000000
# 11 0 0 0 6 2 8 0.00000000 0.00000000 0.00000000
# 12 0 1 1 4 2 6 0.16666667 0.66666667 0.33333333
# 13 1 0 1 4 1 5 0.20000000 0.80000000 0.20000000
# 14 0 0 0 3 1 4 0.00000000 0.00000000 0.00000000
# 15 0 0 0 3 0 3 0.00000000 0.00000000 0.00000000
# 17 0 0 0 2 0 2 0.00000000 0.00000000 0.00000000
# 20 0 0 0 1 0 1 0.00000000 0.00000000 0.00000000
# ログランク検定(一般的)
# chisq. = 3.3805322873790673, df = 1, p value = 0.06597074594879886
logrank(group, event, time, method = "Tominaga")
# time da db dt na nb nt d ea eb
# 1 0 2 2 16 14 30 0.06666667 1.06666667 0.93333333
# 2 2 2 4 15 12 27 0.14814815 2.22222222 1.77777778
# 略
# 20 0 0 0 1 0 1 0.00000000 0.00000000 0.00000000
# ログランク検定(富永)
# chisq. = 3.1527657452320845, df = 1, p value = 0.07579838765953799