裏 RjpWiki

Julia ときどき R, Python によるコンピュータプログラム,コンピュータ・サイエンス,統計学

Julia に翻訳--177 Fleiss のκ統計量

2021年04月11日 | ブログラミング

#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

Python による統計処理 
http://aoki2.si.gunma-u.ac.jp/Python/PythonDir.tar.gz

Fleiss のκ統計量
http://aoki2.si.gunma-u.ac.jp/Python/kappa_m.pdf

ファイル名: kappam.jl  関数名: kappam

翻訳するときに書いたメモ

==========#

using Rmath, Statistics, Printf

function kappam(r; nm=true, Fleiss=true, each=true)
    pstr2(p) = p >= 0.0001 ? string(round(p, digits=4)) : "< 0.0001"
    n, m = size(r)
    if nm
        category = sort(unique(r))
        k = length(category)
        nk = zeros(Int, n, k)
        mk = zeros(Int, m, k)
        for i = 1:n
            for (j, j2) = enumerate(indexin(r[i,:], category))
                nk[i, j2] += 1
                mk[j, j2] += 1
            end
        end
    else
        nk = r
        k = m
        category = collect(1:k)
        m = sum(nk[1, :])
        Fleiss = true
    end
    colSums  = vec(sum(nk, dims=1))
    Po = sum((sum(nk.^2, dims=2) .- m) ./ (m * (m - 1)) / n)
    method = "Fleiss' Kappa: $m Raters, $n Subjects, $k Categories"
    if Fleiss
        Pe = sum(colSums.^2) / (n * m)^2
        kappa = (Po - Pe) / (1 - Pe)
        pj = colSums / (n * m)
        qj = 1 .- pj
        pq = sum(pj .* qj)^2
        var = (2 / (pq * (n * m * (m - 1)))) * (pq - sum(pj .* qj .* (qj - pj)))
        se = sqrt(var)
        Z = kappa / se
        pvalue = 2pnorm(abs(Z), false)
        # kappa for each category
        pjk = (vec(sum(nk.^2, dims=1)) .- n * m .* pj) ./ (n * m * (m - 1) .* pj)
        kappak = (pjk .- pj) ./ (1 .- pj)
        vark = 2 / (n * m * (m - 1))
        sek = sqrt(vark)
        Zk = kappak ./ sek
        pvaluek = 2pnorm.(abs.(Zk), false)
        pstrk = pstr2.(pvaluek)
        @printf("%5s  %10s  %10s %10s %10s\n", "", "Kappa", "SE", "Z", "p value")
        for i = 1:k
            @printf("%5s  %10.6f  %10.6f %10.6f  %10s\n",
                    category[i], kappak[i], sek, Zk[i], pstrk[i])
        end
        #results = DataFrame({"Kappa": kappak, "SE": sek, "Z": Zk, "p value": pstrk},
        #                       index=category)
        println(method)
        println("kappa = $kappa,  SE = $se,  Z = $Z,  pvalue = $pvalue")
        #Dict(n=n, m=m, method=method, kappa=kappa, se=se, Z=Z,
        #              pvalue=pvalue, results=results, nk=array(nk, dtype=int),
        #              mk=array(mk, dtype=int) if nm else NaN)
    else
        method += " (exact value)"
        Pe = sum(colSums^2) / (n * m)^2 - sum((mk / n).var(dims=0)) / (m - 1)
        kappa = (Po - Pe) / (1 - Pe)
        println(method)
        println("kappa = {0:.5g}".format(kappa))
        Dict(:n => n, :m => m, :method => method, :kappa => kappa)
    end
end

r = ["Neu" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Per" "Oth" "Oth" "Oth"
     "Per" "Sch" "Sch" "Sch" "Sch" "Oth"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"
     "Per" "Per" "Per" "Neu" "Neu" "Neu"
     "Dep" "Dep" "Sch" "Sch" "Sch" "Sch"
     "Sch" "Sch" "Sch" "Sch" "Oth" "Oth"
     "Dep" "Dep" "Sch" "Sch" "Sch" "Neu"
     "Dep" "Dep" "Neu" "Neu" "Neu" "Neu"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"
     "Dep" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Per" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Per" "Sch" "Sch" "Sch"
     "Dep" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Neu" "Neu" "Neu" "Oth"
     "Sch" "Sch" "Sch" "Sch" "Sch" "Oth"
     "Dep" "Dep" "Dep" "Neu" "Oth" "Oth"
     "Dep" "Dep" "Dep" "Dep" "Dep" "Per"
     "Per" "Per" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Sch" "Sch" "Oth" "Oth" "Oth"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"
     "Per" "Neu" "Neu" "Neu" "Neu" "Neu"
     "Per" "Per" "Neu" "Oth" "Oth" "Oth"
     "Dep" "Dep" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Neu" "Neu" "Neu" "Neu" "Oth"
     "Per" "Per" "Per" "Per" "Per" "Neu"
     "Dep" "Dep" "Dep" "Dep" "Oth" "Oth"
     "Per" "Per" "Neu" "Neu" "Neu" "Neu"
     "Dep" "Sch" "Sch" "Sch" "Sch" "Sch"
     "Oth" "Oth" "Oth" "Oth" "Oth" "Oth"];

kappam(r)
#           Kappa          SE          Z    p value
# Dep    0.244755    0.047140   5.192043    < 0.0001
# Neu    0.471127    0.047140   9.994119    < 0.0001
# Oth    0.566118    0.047140  12.009172    < 0.0001
# Per    0.244755    0.047140   5.192043    < 0.0001
# Sch    0.520000    0.047140  11.030866    < 0.0001
# Fleiss' Kappa: 6 Raters, 30 Subjects, 5 Categories
# kappa = 0.43024452006014074,  SE = 0.02437393209941115,  Z = 17.65183058299137,  pvalue = 9.851070940926153e-70

nk = [0 0 0 0 14
      0 2 6 4 2
      0 0 3 5 6
      0 3 9 2 0
      2 2 8 1 1
      7 7 0 0 0
      3 2 6 3 0
      2 5 3 2 2
      6 5 2 1 0
      0 2 2 3 7];
d = kappam(nk, nm=false)
#         Kappa          SE          Z    p value
# 1    0.201282    0.033150   6.071916    < 0.0001
# 2    0.079670    0.033150   2.403352      0.0162
# 3    0.171598    0.033150   5.176450    < 0.0001
# 4    0.030381    0.033150   0.916491      0.3594
# 5    0.507657    0.033150  15.314077    < 0.0001
# Fleiss' Kappa: 14 Raters, 10 Subjects, 5 Categories
# kappa = 0.20993070442195527,  SE = 0.016965069224393132,  Z = 12.374291059190467,  pvalue = 3.6005943234665503e-35

 

コメント
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする

Julia に翻訳--176 Fligner-Killeen 検定,三群以上の等分散性の検定

2021年04月11日 | ブログラミング

#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

Python による統計処理 
http://aoki2.si.gunma-u.ac.jp/Python/PythonDir.tar.gz

Fligner-Killeen 検定(三群以上の等分散性の検定)
http://aoki2.si.gunma-u.ac.jp/Python/Fligner_Killeen_test.pdf

ファイル名: flignerkilleentest.jl  関数名: flignerkilleentest

翻訳するときに書いたメモ

副関数の方が長い。

==========#

using Rmath, StatsBase, Statistics

function flignerkilleentest(x, group)
    d = split2(x, group)
    ni = sapply(d, length)
    k = length(ni)
    n = sum(ni)
    x = sapply(d, y -> y .- median(y))
    a = qnorm.((1 .+ tiedrank(abs.(vcat(x...))) ./ (n + 1)) / 2)
    b = split3(a, ni)
    chisq = sum(sapply(b, y -> sum(y)^2) ./ ni)
    chisq = (chisq .- n * mean(a)^2) / var(a)
    df = k - 1
    pvalue = pchisq(chisq, df, false)
    println("chisq. = $chisq,  df = $df,  p value = $pvalue")
    Dict(:chisq => chisq, :df => df, :pvalue => pvalue)
end

function split2(x, group)
    index = sort(unique(group))
    y = Array{Array{Float64,1},1}(undef, length(index))
    [y[i] = x[group .== i] for i in index]
end

function split3(x, ni)
    y = Array{Array{Float64,1},1}(undef, length(ni))
    bound = vcat(0, cumsum(ni))
    [y[i] = x[bound[i]+1:bound[i]+ni[i]] for i in 1:length(ni)]
end

sapply(x, FUNC) = map(FUNC, x)

function rep(x; each::Int64)
    vcat([repeat([x[i]], each) for i = 1:length(x)]...)
end


x = [3, 3, 4, 2, 5, 2, 3, 4, 8, 8, 5, 6];
group = rep(1:3, each = 4);
flignerkilleentest(x, group)
chisq. = 2.6082308115793977,  df = 2,  p value = 0.2714125188285773

コメント
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする

PVアクセスランキング にほんブログ村

PVアクセスランキング にほんブログ村