#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
クロス集計表・分布表の双対尺度法
http://aoki2.si.gunma-u.ac.jp/R/dual.html
ファイル名: dual.jl 関数名: dual, printdual, plotdual
翻訳するときに書いたメモ
==========#
using Rmath, LinearAlgebra, Printf, NamedArrays, Plots
function dual(tbl; rownames=[], colnames=[])
nr, nc = size(tbl)
length(rownames) > 0 || (rownames = ["Row-$i" for i = 1:nr])
length(colnames) > 0 || (colnames = ["Col-$i" for i = 1:nc])
ft = sum(tbl)
fr = sum(tbl, dims=2)
fc = vec(sum(tbl, dims=1))
temp = sqrt.(fc * fc')
w = transpose(tbl ./ fr) * tbl ./ temp .- temp ./ ft
values, vectors = eigen(w, sortby=x-> -x)
ne = sum(values .> 1e-5)
vectors = vectors[:, 1:ne]
values = values[1:ne]
colscore = vectors .* sqrt.(ft ./ fc)
rowscore = tbl * colscore ./ (fr .* sqrt.(values)')
colscore2 = transpose(sqrt.(values)) .* colscore
rowscore2 = transpose(sqrt.(values)) .* rowscore
cont = values ./ tr(w) * 100
chisq = -(ft - 1 - (nr + nc - 1) / 2) .* log.(1 .- values)
df = (nr + nc - 1) .- 2 .* (1:ne)
P = pchisq.(chisq, df, false)
axisnames = ["Axis-$i" for i = 1:ne]
Dict(:values => values, :correlation => sqrt.(values), :cont => cont,
:cumulativecontribution => cumsum(cont), :chisq => chisq, :df => df,
:P => P, :rowscore => rowscore, :colscore => colscore,
:rowscore2 => rowscore2, :colscore2 => colscore2,
:rownames => rownames, :colnames => colnames, :axisnames => axisnames)
end
function printout(name, vector; str=false)
@printf("%-25s", name)
for i = 1:length(vector)
str ? @printf("%9s", vector[i]) : @printf("%9.3f", vector[i])
end
println()
end
function printdual(x; weighted = true)
printout("", x[:axisnames], str=true)
printout("eta square", x[:values])
printout("correlation", sqrt.(x[:values]))
printout("contribution", x[:cont])
printout("cumulative contribution", cumsum(x[:cont]))
printout("Chi square value", x[:chisq])
printout("degrees of freedom", x[:df], str=true)
printout("P value", x[:P])
if weighted
println("\nweighted row score\n",
NamedArray(x[:rowscore2], (x[:rownames], x[:axisnames])))
println("\nweighted col score\n",
NamedArray(x[:colscore2], (x[:colnames], x[:axisnames])))
else
println("\nrow score\n",
NamedArray(x[:rowscore], (x[:rownames], x[:axisnames])))
println("\ncol score\n",
NamedArray(x[:colscore], (x[:colnames], x[:axisnames])))
end
end
function plotdual(results; weighted=true, ax1=1, ax2=2)
pyplot()
if weighted
row, col = results[:rowscore], results[:colscore]
else
row, col = results[:rowscore2], results[:colscore2]
end
plt = scatter(row[:, ax1], row[:, ax2], grid=false, tick_direction=:out,
color=:red, xlabel="Axis-$ax1", ylabel="Axis-$ax2",
aspect_ratio=1, size=(600, 600), label="")
annotate!.(row[:, ax1], row[:, ax2],
text.(" " .* results[:rownames], 8, :red, :left));
scatter!(col[:, ax1], col[:, ax2], grid=false, tick_direction=:out,
color=:blue, label="")
annotate!.(col[:, ax1], col[:, ax2],
text.(" " .* results[:colnames], 8, :blue, :left))
display(plt)
end
tbl = [
2 3 5 6
5 1 7 5
5 3 4 3
]
res = dual(tbl)
printdual(res)
#=====
Axis-1 Axis-2
eta square 0.049 0.038
correlation 0.221 0.194
contribution 56.572 43.428
cumulative contribution 56.572 100.000
Chi square value 2.263 1.727
degrees of freedom 4 2
P value 0.687 0.422
weighted row score
3×2 Named Matrix{Float64}
A ╲ B │ Axis-1 Axis-2
──────┼─────────────────────────
Row-1 │ -0.317955 -0.00775345
Row-2 │ 0.147306 0.219535
Row-3 │ 0.162385 -0.255172
weighted col score
4×2 Named Matrix{Float64}
A ╲ B │ Axis-1 Axis-2
──────┼───────────────────────
Col-1 │ 0.343349 -0.0831775
Col-2 │ -0.206018 -0.419061
Col-3 │ 0.0256527 0.153724
Col-4 │ -0.220608 0.10514
=====#
plotdual(res) # savefig("fig1.png")
plotdual(res, weighted=false)