#========== Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。 以下のプログラムを Julia に翻訳してみる。 因子分析 http://aoki2.si.gunma-u.ac.jp/R/pfa.html ファイル名: pfa.jl 関数名: pfa, printpfa, plotpfa 翻訳するときに書いたメモ ==========# using CSV, DataFrames, Statistics, StatsBase, LinearAlgebra, Printf, Plots function pfa(data::DataFrame; method="Varimax", # "biQuartimax", "Quartimax", "Equimax", "None"), eps1=1e-5, eps2=1e-5, max1=999, max2=999, factors=0) pfa(Array{Float64,2}(data); name=names(data), method, eps1, eps2, max1, max2, factors) end function pfa(data::Array{Int64,2}; name=[], method="Varimax", # "biQuartimax", "Quartimax", "Equimax", "None"), eps1=1e-5, eps2=1e-5, max1=999, max2=999, factors=0) pfa(Array{Float64,2}(data); name, method, eps1, eps2, max1, max2, factors) end function pfa(data::Array{Float64,2}; name=[], method="Varimax", # "biQuartimax", "Quartimax", "Equimax", "None"), eps1=1e-5, eps2=1e-5, max1=999, max2=999, factors=0) nr, nc = size(data) if length(name) == 0 name = "X" .* string.(1:nc) end wt=getwt(method, nc) r0 = cor(data) r = copy(r0) communality0 = 1 .- 1 ./ diag(inv(r)) communality = similar(communality0) [r[i, i] = communality0[i] for i = 1:nc] eigenvalue, eigenvector = eigen(r, sortby = x -> -x) if factors == 0 factors = sum(eigenvalue .>= 1) end converged = false for i = 1:max1 eigenvalue = eigenvalue[1:factors] eigenvector = eigenvector[:, 1:factors] eigenvector = transpose(sqrt.(eigenvalue) .* transpose(eigenvector)) r = copy(r0) communality = sum(eigenvector .^ 2, dims = 2) if all(abs.(communality .- communality0) .< eps1) converged = true break end communality0 = copy(communality) [r[i, i] = communality[i] for i = 1:nc] eigenvalue, eigenvector = eigen(r, sortby = x -> -x) end if converged == false println("Not converged.") elseif any(communality .>= 1) println("Communality >= 1.") end temp = fit(ZScoreTransform, data, dims = 1) data = StatsBase.transform(temp, data) if factors == 1 || method == "None" w = inv(r0) * eigenvector scores = (data .* sqrt(nr / (nr - 1))) * w return Dict(:method => method, :correlationmatrix => r0, :communality => communality, :beforerotationfl => eigenvector, :beforerotationeval => eigenvalue, :afterrotationfl => NaN, :afterrotationeval => NaN, :scores => scores, :name => name) else fl = eigenvector ./ sqrt.(communality) eig = zeros(factors) ov = 0 fnp = nc for loop = 1:max2 for k = 1:factors-1 for k1 = k+1:factors x = fl[:, k] y = fl[:, k1] xy = x .^ 2 .- y .^ 2 a = sum(xy) b = 2sum(x .* y) c = sum(xy .^ 2 .- 4x .^ 2 .* y .^ 2) d = 4sum(x .* y .* xy) dd = d - 2 * wt * a * b / fnp theta = atan(dd / (c - wt * (a^2 - b^2) / fnp)) if sin(theta) * dd < 0 if theta > 0 theta -= pi else theta += pi end end theta /= 4.0 si, cs = sincos(theta) fljk = fl[:, k] fljk1 = fl[:, k1] fl[:, k] = fljk .* cs .+ fljk1 .* si fl[:, k1] = fljk1 .* cs .- fljk .* si end end v = sum((fl .^ 2 .- sum(fl .^ 2, dims = 1) .* (wt / fnp)) .^ 2) if abs(v - ov) < eps2 break end ov = v end fl = fl .* sqrt.(communality) w = inv(r0) * fl scores = (data .* sqrt(nr / (nr - 1))) * w eigenvalue2 = vec(sum(fl .^ 2, dims = 1)) Dict(:method => method, :correlationmatrix => r0, :communality => communality, :beforerotationfl => eigenvector, :beforerotationeval => eigenvalue, :afterrotationfl => fl, :afterrotationeval => eigenvalue2, :scores => scores, :name => name) end end function getwt(method, nc) method0 = ["Varimax", "biQuartimax", "Quartimax", "Equimax", "None"] for (i, m) in enumerate(method0) if method == m return [1, 0.5, 0, nc / 2, 999][i] end end error("'method' must be one of $method0") end function printpfa(result; before=false) communality = result[:communality] name = [s[1:min(length(s), 7)] for s in result[:name]] if before fl = result[:beforerotationfl] eigenvalue = result[:beforerotationeval] label = "E-value" if result[:method] == "None" println("\nResult without rotation\n") else println("\nBefore $(result[:method]) rotation\n") end else fl = result[:afterrotationfl] eigenvalue = result[:afterrotationeval] label = "SqSum" println("\nAfter $(result[:method]) rotation\n") end signchange!(fl) nv, npca = size(fl) @printf("%7s", "") for j = 1:npca @printf(" Fac%02d", j) end println(" Contribution") for i = 1:nv @printf("%-7s", name[i]) for j = 1:npca @printf("%10.5f", fl[i, j]) end @printf("%10.5f\n", communality[i]) end lineout(label, eigenvalue) lineout("Cont", eigenvalue ./ nv .* 100) lineout("Cum", cumsum(eigenvalue) ./ nv .* 100) end # 列ごとに見て,負の数が過半数のときは全体の符号を付け替え,常に正の数が過半数になるようにする # 正負の数が同数の場合には,第1要素が負であれば全体の符号を付け替える function signchange!(x) nr, nc = size(x) half = fld(nr, 2) for i = 1:nc minus = sum(x[:, i] .< 0) if minus > half || (minus == half && x[1, i] < 0) x[:, i] = -x[:, i] end end end function lineout(label, value) @printf("%-7s", label) for val in value @printf("%10.5f", val) end println() end function plotpfa(result; before=false, facno=1:2, scores=false, xlabel="", ylabel="", axis=true, labelcex=0.7) ax1, ax2 = facno xlabel != "" || (xlabel = "Fac-" * string(ax1)) ylabel != "" || (ylabel = "Fac-" * string(ax2)) if scores x = result[:scores][:, ax1] y = result[:scores][:, ax2] labels = string.(1:length(x)) title = "Factor scores" else fl = before ? result[:beforerotationfl] : result[:afterrotationfl] x = fl[:, ax1] y = fl[:, ax2] labels = result[:name] title = "Factor loadings" end plt = scatter(x, y, grid=false, tick_direction=:out, markerstrokecolor=:blue, xlabel = xlabel, ylabel = ylabel, title=title, label = "") annotate!(x, y, text.(" " .* labels, 6, :left, :top, :green), label="") display(plt) end data = [ 935 955 926 585 1010 925 1028 807 769 767 817 905 901 632 1004 950 957 844 781 738 768 825 859 662 893 900 981 759 868 732 869 915 856 448 867 874 884 802 804 857 787 878 880 592 871 874 884 781 782 807 738 848 850 569 814 950 957 700 870 764 763 862 839 658 887 900 1005 604 709 753 795 890 841 670 853 874 859 701 680 772 903 877 919 460 818 849 884 700 718 716 761 765 881 485 846 900 981 728 781 714 747 792 800 564 796 849 932 682 746 767 771 802 840 609 824 874 859 668 704 710 785 878 805 527 911 680 884 728 709 747 657 773 820 612 810 849 909 698 746 771 696 785 791 578 774 725 932 765 706 795 724 785 870 509 746 849 807 763 724 760 712 829 838 516 875 725 807 754 762 585 756 863 815 474 873 725 957 624 655 620 622 759 786 619 820 769 807 673 698 695 668 753 751 551 834 849 807 601 655 642 ]; result = pfa(data, factors = 3, method = "Varimax") printpfa(result) After Varimax rotation Fac01 Fac02 Fac03 Contribution X1 0.81752 0.38108 -0.25443 0.87829 X2 0.86716 0.22562 -0.05438 0.80583 X3 0.58096 0.61851 -0.13273 0.73769 X4 -0.04083 0.06992 0.72983 0.53921 X5 0.83445 0.02648 0.31515 0.79632 X6 0.18699 0.63099 0.37141 0.57106 X7 0.41170 0.33928 0.31716 0.38520 X8 0.34889 0.56003 -0.14871 0.45748 X9 0.11754 0.73398 0.12509 0.56819 X10 0.08607 0.56161 0.05523 0.32586 SqSum 2.80320 2.26529 0.99662 Cont 28.03196 22.65289 9.96624 Cum 28.03196 50.68484 60.65108 printpfa(result, before=true) Before Varimax rotation Fac01 Fac02 Fac03 Contribution X1 0.85127 -0.38093 0.09227 0.87829 X2 0.80339 -0.36668 -0.16103 0.80583 X3 0.83137 -0.04526 0.21088 0.73769 X4 0.06449 0.52620 -0.50809 0.53921 X5 0.67639 -0.22528 -0.53672 0.79632 X6 0.57352 0.49151 -0.02347 0.57106 X7 0.55426 0.17470 -0.21789 0.38520 X8 0.61545 0.03447 0.27840 0.45748 X9 0.56939 0.42882 0.24515 0.56819 X10 0.42989 0.30435 0.22006 0.32586 E-value 4.04685 1.15906 0.85920 Cont 40.46847 11.59059 8.59202 Cum 40.46847 52.05907 60.65108 ===# plotpfa(result) plotpfa(result, scores=true)