#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
重回帰分析
http://aoki2.si.gunma-u.ac.jp/R/mreg.html
ファイル名: mreg.jl 関数名: mreg
翻訳するときに書いたメモ
出力にこだわらなければ簡単なプログラムなんだけど。
==========#
using Statistics, LinearAlgebra, Rmath, Printf
function mreg(dat)
n, nc = size(dat)
nv = nc - 1
r = cor(dat)
mall = vec(mean(dat, dims=1))
m = mall[1:nc-1]
betas = r[1:nc-1, 1:nc-1] \ r[1:nc-1, nc]
variance = cov(dat) .* (n - 1)
prop = diag(variance)
prop = (prop / prop[nc])[1:nc-1]
b = betas ./ sqrt.(prop)
Sr = transpose(variance[:, nc][1:nc-1]) * b
St = variance[nc, nc]
Se = St .- Sr
SS = [Sr, Se, St]
dfr = nv
dfe = n - nv - 1
dft = n - 1
df = [dfr, dfe, dft]
Ms = SS ./ df
f = Ms[1] / Ms[2]
fvalue = [f, NaN, NaN]
p = [pf(f, dfr, dfe, false),NaN, NaN]
b0 = mall[nc] - sum(b .* m)
b = vcat(b, b0)
inverse = inv((n - 1) .* cov(dat)[1:nc-1, 1:nc-1])
SEb = vcat([sqrt(inverse[i, i] * Ms[2]) for i =1:nv],
sqrt((1 / n + transpose(m) * inverse * m) * Ms[2]))
tval = b ./ SEb
pval = pt.(abs.(tval), n - nv - 1, false) * 2
tolerance = 1 ./ diag(inv(cor(dat)[1:nc-1, 1:nc-1]))
R2 = 1 - Se / St
R = sqrt(R2)
R2s = 1 - Ms[2] / Ms[3]
loglik = - 0.5 * n * (log(2 * pi) + 1 - log(n) + log(Se))
AIC = 2 * nc + 2 - 2 * loglik
printresult(b, SEb, tval, pval, betas, tolerance)
printanovatable(SS, df, Ms, f, p)
printstatistics(R2, R, R2s, loglik, AIC)
Dict(:result => hcat(b, SEb, tval, pval, vcat(betas, NaN), vcat(tolerance, NaN)),
:anova => hcat(SS, df, Ms, fvalue, p),
:Rs => [R2, R, R2s, loglik, AIC])
end
diag(a) = [a[i, i] for i in 1:size(a, 1)]
function printresult(b, SEb, tval, pval, betas, tolerance)
@printf("%6s%12s%12s%12s%12s%18s%12s\n",
"", "偏回帰係数", "標準誤差", "t 値", "P 値", "標準化偏回帰係数", "トレランス")
for i = 1:length(betas)
@printf("%6s%12.5g%12.5g%12.5g%12.5g%18.5g%12.5g\n",
"Var" * string(i), b[i], SEb[i], tval[i], pval[i], betas[i], tolerance[i])
end
@printf("%6s%12.5g%12.5g%12.5g%12.5g\n",
"定数項", b[end], SEb[end], tval[end], pval[end])
end
function printanovatable(SS, df, Ms, f, p)
println("回帰の分散分析表")
@printf("%4s %10s %6s %12s %12s %12s\n",
"", "平方和", "自由度", "平均平方", "F 値", "P 値")
@printf("回帰 %10.6g %6d %12.6g %12.6g %12.6g\n",
SS[1], df[1], Ms[1], f[1], p[1])
@printf("残差 %10.6g %6d %12.6g\n", SS[2], df[2], Ms[2])
@printf("全体 %10.6g %6d %12.6g\n", SS[3], df[3], Ms[3])
end
function printstatistics(R2, R, R2s, loglik, AIC)
println("重相関係数 = $R")
println("重相関係数の二乗 = $R2")
println("自由度調整済重相関係数の二乗 = $R2s")
println("対数尤度 = $loglik")
println("AIC = $AIC")
end
dat = [
1.2 1.9 0.9
1.6 2.7 1.3
3.5 3.7 2.0
4.0 3.1 1.8
5.6 3.5 2.2
5.7 7.5 3.5
6.7 1.2 1.9
7.5 3.7 2.7
8.5 0.6 2.1
9.7 5.1 3.6]
mreg(dat)
# 偏回帰係数 標準誤差 t 値 P 値 標準化偏回帰係数 トレランス
# Var1 0.20462 0.0075643 27.051 2.4192e-08 0.67067 0.98358
# Var2 0.28663 0.010802 26.536 2.7638e-08 0.65793 0.98358
# 定数項 0.14918 0.054506 2.7368 0.02905
# 回帰の分散分析表
# 平方和 自由度 平均平方 F 値 P 値
# 回帰 6.67164 2 3.33582 823.477 4.93187e-09
# 残差 0.0283563 7 0.0040509
# 全体 6.7 9 0.744444
# 重相関係数 = 0.9978816139144453
# 重相関係数の二乗 = 0.9957677153884982
# 自由度調整済重相関係数の二乗 = 0.9945584912137834
# 対数尤度 = 15.13806917315012
# AIC = -22.27613834630024