「正規序数値の期待値」とは聞き慣れない訳語かもしれないが,Expected values of normal order statistics のこと。
H. Leon Harter(1961)Expected Values of Normal Order Statistics, Biometrika, 48, 151-165.
https://www.jstor.org/stable/2333139
大本に近いのは以下
Algorithm AS 177: Expected Normal Order Statistics (Exact and Approximate)
http://imaging.mrc-cbu.cam.ac.uk/statswiki/FAQ/Corder?action=AttachFile&do=get&target=royston.pdf
FORTRAN 90 に書き換えたのが
as177.f90
https://jblevins.org/mirror/amiller/as177.f90
であるが,correc 関数中の定数 C3(4) の 2.157E6 が 2.1576E6 と誤転写されている模様(C に書き換えられたものもあったが 2.157E6 だった)。
なにはともあれ,Julia で最適化して(?)以下のコードを得る。
nscor1 が正確なもの。nscore2 は近似解。
using SpecialFunctions
using Distributions
function nscor1(n)
"""
Exact calculation of Normal Scores
"""
NSTEP = 721
work = zeros(4, NSTEP)
init!(work)
n2 = div(n, 2)
s = zeros(n2)
h = 0.025
c1 = logfactorial(n)
d = c1 - log(n)
for i in 1:n2
i1 = i-1
ni = n-i
c = c1 - d
scor = 0
for j in 1:NSTEP
scor += exp(work[2, j] + i1 * work[3, j] + ni * work[4, j] + c) * work[1, j]
end
s[i] = scor * h
d += log(i / ni)
end
return s
end
function init!(work)
xstart = -9
h = 0.025
pi2 = -0.918938533
xx = xstart
for i in 1:size(work, 2)
work[1, i] = xx
work[2, i] = pi2 - 0.5 * xx^2
work[3, i] = log(ccdf(Normal(), xx))
work[4, i] = log(cdf(Normal(), xx))
xx = xstart + i * h
end
end
近似解 nscor2 は以下
function nscor2(n)
"""
Calculates approximate expected values of normal order statistics.
claimed accuracy is 0.0001, though usually accurate to 5-6 dec.
"""
eps = [0.419885, 0.450536, 0.456936, 0.468488]
dl1 = [0.112063, 0.121770, 0.239299, 0.215159]
dl2 = [0.080122, 0.111348, -0.211867, -0.115049]
gam = [0.474798, 0.469051, 0.208597, 0.259784]
lam = [0.282765, 0.304856, 0.407708, 0.414093]
bb = -0.283833
d = -0.106136
b1 = 0.5641896
n2 = div(n, 2)
s = zeros(n2)
s[1] = b1
n == 2 && return
k = 3
n2 < k && (k = n2)
for i in 1:k
e1 = (i - eps[i]) / (n + gam[i])
e2 = e1^lam[i]
s[i] = e1 + e2 * (dl1[i] + e2 * dl2[i]) / n - correct(i, n)
end
if n2 != k
for i in 4:n2
l1 = lam[4] * bb / (i + d)
e1 = (i - eps[4]) / (n + gam[4])
e2 = e1^l1
s[i] = e1 + e2 * (dl1[4] + e2 * dl2[4]) / n - correct(i, n)
end
end
for i in 1:n2
s[i] = -quantile(Normal(), s[i]) # -ppnd(s[i])
end
return s
end
function correct(i, n)
"""
Calculates correction for tail area of the i-th largest of n order statistics.
"""
c1 = [9.5, 28.7, 1.9, 0.0, -7.0, -6.2, -1.6]
c2 = [-6195.0, -9569.0, -6728.0, -17614.0, -8278.0, -3570.0, 1075.0]
c3 = [9.338e4, 1.7516e5, 4.1040e5, 2.157e6, 2.376e6, 2.065e6, 2.065e6]
mic = 1e-6
c14 = 1.9e-5
i * n == 4 && return c14
(i < 1 || i > 7) && return 0
(i != 4 && n > 20) && return 0
(i == 4 && n > 40) && return 0
an = 1 / n^2
return (c1[i] + an * (c2[i] + an * c3[i])) * mic
end
呼び出し方はいずれも同じ n だけを引数にして呼ぶ。
nscore1(30)
15-element Vector{Float64}:
2.0427608445896133
1.6155999007846518
1.3648120475550987
1.1785538961988162
1.0260849654763988
0.8943875579926721
0.7766582371097811
0.6688521087544627
0.5683389891912317
0.47328800401844007
0.38235131764319513
0.29448658895879626
0.2088497011984587
0.12472539951340163
0.04147914900072655
nscor2(30)
15-element Vector{Float64}:
2.042747144755947
1.6155640427483784
1.3648321622142314
1.1749378349295097
1.023401473812151
0.892300080600137
0.7750026028133499
0.667520049660839
0.5672570676344856
0.47240515390672405
0.38163143261223
0.29390400347499945
0.20838637645235597
0.12436915904576364
0.04122259874887312