裏 RjpWiki

Julia ときどき R, Python によるコンピュータプログラム,コンピュータ・サイエンス,統計学

Julia に翻訳--177 Fleiss のκ統計量

2021年04月11日 | ブログラミング

#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

Python による統計処理 
http://aoki2.si.gunma-u.ac.jp/Python/PythonDir.tar.gz

Fleiss のκ統計量
http://aoki2.si.gunma-u.ac.jp/Python/kappa_m.pdf

ファイル名: kappam.jl  関数名: kappam

翻訳するときに書いたメモ

==========#

using Rmath, Statistics, Printf

function kappam(r; nm=true, Fleiss=true, each=true)
    pstr2(p) = p >= 0.0001 ? string(round(p, digits=4)) : "< 0.0001"
    n, m = size(r)
    if nm
        category = sort(unique(r))
        k = length(category)
        nk = zeros(Int, n, k)
        mk = zeros(Int, m, k)
        for i = 1:n
            for (j, j2) = enumerate(indexin(r[i,:], category))
                nk[i, j2] += 1
                mk[j, j2] += 1
            end
        end
    else
        nk = r
        k = m
        category = collect(1:k)
        m = sum(nk[1, :])
        Fleiss = true
    end
    colSums  = vec(sum(nk, dims=1))
    Po = sum((sum(nk.^2, dims=2) .- m) ./ (m * (m - 1)) / n)
    method = "Fleiss' Kappa: $m Raters, $n Subjects, $k Categories"
    if Fleiss
        Pe = sum(colSums.^2) / (n * m)^2
        kappa = (Po - Pe) / (1 - Pe)
        pj = colSums / (n * m)
        qj = 1 .- pj
        pq = sum(pj .* qj)^2
        var = (2 / (pq * (n * m * (m - 1)))) * (pq - sum(pj .* qj .* (qj - pj)))
        se = sqrt(var)
        Z = kappa / se
        pvalue = 2pnorm(abs(Z), false)
        # kappa for each category
        pjk = (vec(sum(nk.^2, dims=1)) .- n * m .* pj) ./ (n * m * (m - 1) .* pj)
        kappak = (pjk .- pj) ./ (1 .- pj)
        vark = 2 / (n * m * (m - 1))
        sek = sqrt(vark)
        Zk = kappak ./ sek
        pvaluek = 2pnorm.(abs.(Zk), false)
        pstrk = pstr2.(pvaluek)
        @printf("%5s  %10s  %10s %10s %10s\n", "", "Kappa", "SE", "Z", "p value")
        for i = 1:k
            @printf("%5s  %10.6f  %10.6f %10.6f  %10s\n",
                    category[i], kappak[i], sek, Zk[i], pstrk[i])
        end
        #results = DataFrame({"Kappa": kappak, "SE": sek, "Z": Zk, "p value": pstrk},
        #                       index=category)
        println(method)
        println("kappa = $kappa,  SE = $se,  Z = $Z,  pvalue = $pvalue")
        #Dict(n=n, m=m, method=method, kappa=kappa, se=se, Z=Z,
        #              pvalue=pvalue, results=results, nk=array(nk, dtype=int),
        #              mk=array(mk, dtype=int) if nm else NaN)
    else
        method += " (exact value)"
        Pe = sum(colSums^2) / (n * m)^2 - sum((mk / n).var(dims=0)) / (m - 1)
        kappa = (Po - Pe) / (1 - Pe)
        println(method)
        println("kappa = {0:.5g}".format(kappa))
        Dict(:n => n, :m => m, :method => method, :kappa => kappa)
    end
end

r = ["Neu" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Per" "Oth" "Oth" "Oth"
     "Per" "Sch" "Sch" "Sch" "Sch" "Oth"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"
     "Per" "Per" "Per" "Neu" "Neu" "Neu"
     "Dep" "Dep" "Sch" "Sch" "Sch" "Sch"
     "Sch" "Sch" "Sch" "Sch" "Oth" "Oth"
     "Dep" "Dep" "Sch" "Sch" "Sch" "Neu"
     "Dep" "Dep" "Neu" "Neu" "Neu" "Neu"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"
     "Dep" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Per" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Per" "Sch" "Sch" "Sch"
     "Dep" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Neu" "Neu" "Neu" "Oth"
     "Sch" "Sch" "Sch" "Sch" "Sch" "Oth"
     "Dep" "Dep" "Dep" "Neu" "Oth" "Oth"
     "Dep" "Dep" "Dep" "Dep" "Dep" "Per"
     "Per" "Per" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Sch" "Sch" "Oth" "Oth" "Oth"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"
     "Per" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Neu" "Oth" "Oth" "Oth"
     "Dep" "Dep" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Neu" "Neu" "Neu" "Neu" "Oth"
     "Per" "Per" "Per" "Per" "Per" "Neu"
     "Dep" "Dep" "Dep" "Dep" "Oth" "Oth"
     "Per" "Per" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Sch" "Sch" "Sch" "Sch" "Sch"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"];

kappam(r)
#           Kappa          SE          Z    p value
# Dep    0.244755    0.047140   5.192043    < 0.0001
# Neu    0.471127    0.047140   9.994119    < 0.0001
# Oth    0.566118    0.047140  12.009172    < 0.0001
# Per    0.244755    0.047140   5.192043    < 0.0001
# Sch    0.520000    0.047140  11.030866    < 0.0001
# Fleiss' Kappa: 6 Raters, 30 Subjects, 5 Categories
# kappa = 0.43024452006014074,  SE = 0.02437393209941115,  Z = 17.65183058299137,  pvalue = 9.851070940926153e-70

nk = [0 0 0 0 14
      0 2 6 4 2
      0 0 3 5 6
      0 3 9 2 0
      2 2 8 1 1
      7 7 0 0 0
      3 2 6 3 0
      2 5 3 2 2
      6 5 2 1 0
      0 2 2 3 7];
d = kappam(nk, nm=false)
#         Kappa          SE          Z    p value
# 1    0.201282    0.033150   6.071916    < 0.0001
# 2    0.079670    0.033150   2.403352      0.0162
# 3    0.171598    0.033150   5.176450    < 0.0001
# 4    0.030381    0.033150   0.916491      0.3594
# 5    0.507657    0.033150  15.314077    < 0.0001
# Fleiss' Kappa: 14 Raters, 10 Subjects, 5 Categories
# kappa = 0.20993070442195527,  SE = 0.016965069224393132,  Z = 12.374291059190467,  pvalue = 3.6005943234665503e-35

 

コメント
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする

Julia に翻訳--176 Fligner-Killeen 検定,三群以上の等分散性の検定

2021年04月11日 | ブログラミング

#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

Python による統計処理 
http://aoki2.si.gunma-u.ac.jp/Python/PythonDir.tar.gz

Fligner-Killeen 検定(三群以上の等分散性の検定)
http://aoki2.si.gunma-u.ac.jp/Python/Fligner_Killeen_test.pdf

ファイル名: flignerkilleentest.jl  関数名: flignerkilleentest

翻訳するときに書いたメモ

副関数の方が長い。

==========#

using Rmath, StatsBase, Statistics

function flignerkilleentest(x, group)
    d = split2(x, group)
    ni = sapply(d, length)
    k = length(ni)
    n = sum(ni)
    x = sapply(d, y -> y .- median(y))
    a = qnorm.((1 .+ tiedrank(abs.(vcat(x...))) ./ (n + 1)) / 2)
    b = split3(a, ni)
    chisq = sum(sapply(b, y -> sum(y)^2) ./ ni)
    chisq = (chisq .- n * mean(a)^2) / var(a)
    df = k - 1
    pvalue = pchisq(chisq, df, false)
    println("chisq. = $chisq,  df = $df,  p value = $pvalue")
    Dict(:chisq => chisq, :df => df, :pvalue => pvalue)
end

function split2(x, group)
    index = sort(unique(group))
    y = Array{Array{Float64,1},1}(undef, length(index))
    [y[i] = x[group .== i] for i in index]
end

function split3(x, ni)
    y = Array{Array{Float64,1},1}(undef, length(ni))
    bound = vcat(0, cumsum(ni))
    [y[i] = x[bound[i]+1:bound[i]+ni[i]] for i in 1:length(ni)]
end

sapply(x, FUNC) = map(FUNC, x)

function rep(x; each::Int64)
    vcat([repeat([x[i]], each) for i = 1:length(x)]...)
end


x = [3, 3, 4, 2, 5, 2, 3, 4, 8, 8, 5, 6];
group = rep(1:3, each = 4);
flignerkilleentest(x, group)
chisq. = 2.6082308115793977,  df = 2,  p value = 0.2714125188285773

コメント
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする

Julia に翻訳--175 Brunner-Munzel 検定,exact test

2021年04月11日 | ブログラミング

#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

Python による統計処理 
http://aoki2.si.gunma-u.ac.jp/Python/PythonDir.tar.gz

Brunner-Munzel 検定(exact test)
http://aoki2.si.gunma-u.ac.jp/Python/exact_bm_test.pdf

ファイル名: exactbm.jl  関数名: exactbm

翻訳するときに書いたメモ

t 統計量に基づく場合のプログラムである

==========#

using StatsBase, Rmath, Printf

function exactbm(x, y = NaN; exact = true)
    function found()
        hh = bmtest(um)
        if hh >= s1
            nprod = sum(perm_table[rt .+ 1]) - sum(perm_table[um .+ 1])
            nntrue += exp(nprod - nntrue2 * log_expmax)
            while nntrue >= EXPMAX
                nntrue /= EXPMAX
                nntrue2 += 1
            end
        end
        ntables += 1
    end

    function search(x, y)
        if y == 1
            found()
        elseif x == 1
            t = um[1, 1] - um[y, 1]
            if t >= 0
                um[1, 1] = t
                search(nc, y - 1)
                um[1, 1] += um[y, 1]
            end
        else
            search(x - 1, y)
            while um[y, 1] != 0 && um[1, x] != 0
                um[y, x] += 1
                um[y, 1] -= 1
                um[1, x] -= 1
                search(x - 1, y)
            end
            um[y, 1] += um[y, x]
            um[1, x] += um[y, x]
            um[y, x] = 0
        end
    end

    function exacttest()
        denom2 = 0
        denom = perm_table[n + 1] - sum(perm_table[ct .+ 1])
        while denom > log_expmax
            denom -= log_expmax
            denom2 += 1
        end
        denom = exp(denom)
        um[:, 1] = rt
        um[1, :] = ct
        search(nc, nr)
        @printf("正確な P 値 = % .10g\n", nntrue / denom * EXPMAX ^ (nntrue2 - denom2))
        @printf("査察した分割表の個数は % s 個\n", ntables)
    end

    function bmtest(t)
        x2 = rep(score, t[1, :])
        y2 = rep(score, t[2, :])
        r1 = tiedrank(x2)
        r2 = tiedrank(y2)
        r = tiedrank(vcat(x2, y2))
        m1 = mean(r[1:n1])
        m2 = mean(r[n1 + 1:n])
        v1 = sum((r[1:n1] .- r1 .- m1 .+ (n1 + 1) / 2) .^ 2) / (n1 - 1)
        v2 = sum((r[n1 + 1:n] .- r2 .- m2 .+ (n2 + 1) / 2) .^ 2) / (n2 - 1)
        n1 * n2 * abs(m2 - m1) / (n1 + n2) / sqrt(n1 * v1 + n2 * v2)
    end

    function asymptotic(x, y)
        a = bm0(x, y)
        @printf("t = % g,  df = % g,  pvalue = % g\n", a[:t], a[:df], a[:pvalue])
    end

    if ndims(x) == 2
        t = x
    else
        nx = length(x)
        ny = length(y)
        indexx, score, t = table(rep(1:2, [nx, ny]), vcat(x, y))
    end
    EPSILON = eps(Float64)
    EXPMAX = 1e100
    log_expmax = log(EXPMAX)
    nr, nc = size(t)
    nr == 2 || error("nrow(table) wasn't 2")
    um = zeros(Int, nr, nc)
    rt = sum(t, dims=2)
    ct = transpose(sum(t, dims=1))
    n1 = rt[1]
    n2 = rt[2]
    n = n1 + n2
    n2p1div2 = (n2 + 1) / 2
    s1 = bmtest(t) - EPSILON
    asymptotic(x, y)
    if exact
        perm_table = cumsum(vcat(0, log.(1:n .+ 1)))
        ntables = nntrue = nntrue2 = 0
        exacttest()
    end
end

function bm0(x, y)
    n1 = length(x)
    n2 = length(y)
    n = n1 + n2
    r1 = tiedrank(x)
    r2 = tiedrank(y)
    r = tiedrank(vcat(x, y))
    m1 = mean(r[1:n1])
    m2 = mean(r[n1 + 1:n])
    pst = (m2 - (n2 + 1) / 2) / n1
    v1 = sum((r[1:n1] .- r1 .- m1 .+ (n1 + 1) / 2) .^ 2) / (n1 - 1)
    v2 = sum((r[n1 + 1:n] .- r2 .- m2 .+ (n2 + 1) / 2) .^ 2) / (n2 - 1)
    t = n1 * n2 * abs(m2 - m1) / (n1 + n2) / sqrt(n1 * v1 + n2 * v2)
    df0 = ((n1 * v1 + n2 * v2) ^ 2) / (((n1 * v1) ^ 2) / (n1 - 1) + ((n2 * v2) ^ 2) / (n2 - 1))
    pvalue = 2 * pt(-abs(t), df0)
    Dict(:t => t, :df => df0, :pvalue => pvalue, :pst => pst)
end

function rep(x, n::Array{Int64,1})
    length(x) == length(n) || error("length(x) wasn't length(n)")
    vcat([repeat([x[i]], n[i]) for i = 1:length(x)]...)
end

function table(x, y) # 二次元
    indicesx = sort(unique(x))
    indicesy = sort(unique(y))
    counts = zeros(Int, length(indicesx), length(indicesy))
    for (i, j) in zip(indexin(x, indicesx), indexin(y, indicesy))
        counts[i, j] += 1
    end
    return indicesx, indicesy, counts
end

x = [36, 45, 51, 49, 57];
y = [48, 42, 49, 39, 45];
exactbm(x, y)
# t =  0.891133,  df =  4.72131,  pvalue =  0.415956
# 正確な P 値 =  0.3650793651
# 査察した分割表の個数は 132 個

x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1];
y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4];
exactbm(x, y)
# t =  3.13747,  df =  17.6828,  pvalue =  0.00578621
# 正確な P 値 =  0.008037645264
# 査察した分割表の個数は 160 個

exactbm([78, 64, 75, 45, 82], [110, 70, 53, 51])
# t =  0.209274,  df =  6.02423,  pvalue =  0.841132
# 正確な P 値 =  0.8412698413
# 査察した分割表の個数は 126 個

コメント
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする

PVアクセスランキング にほんブログ村

PVアクセスランキング にほんブログ村