久保さんの2008/03/11の集計プログラム
以下のようにしてみた(簡単明瞭?)
> set.seed(12345)
> d <- data.frame(n=sample(10, 500, replace=TRUE),
+ year1= sample(LETTERS[1:5], 500, replace=TRUE),
+ year2= sample(LETTERS[1:5], 500, replace=TRUE),
+ year3= sample(LETTERS[1:5], 500, replace=TRUE))
>
> calc <- function()
+ {
+ calc0 <- function(m)
+ {
+ x <- matrix(0, 5, 5) # year.i から year.j への推移行列
+ colnames(x) <- rownames(x) <- LETTERS[1:5]
+ x[rownames(m), colnames(m)]<- m[rownames(m), colnames(m)] # これがすごい
+ return(as.vector(x)) # ベクトルにして返す
+ }
+ ans <- xtabs(n~year1+year2+year3, d) # 3重クロス集計する
+ a1 <- apply(ans, 1:2, sum) # year1 -> year2 の推移行列
+ a2 <- apply(ans, 2:3, sum) # year2 -> year3 の推移行列
+ year1 <- calc0(a1) # 結果のyear1列
+ year2 <- calc0(a2) # 結果のyear2列
+ type.T1 <- rep(LETTERS[1:5], 5) # その他の列の定義
+ type.T2 <- rep(LETTERS[1:5], each=5)
+ type.T1T2 <- paste(type.T1, type.T2, sep="->")
+ result <- data.frame(type.T1=type.T1, type.T2=type.T2,
+ type.T1T2=type.T1T2,
+ year1=year1, year2=calc0(a2)) # データフレームを構成
+ return(result) # 結果を返す
+ }
> calc()
type.T1 type.T2 type.T1T2 year1 year2
1 A A A->A 101 89
2 B A B->A 146 81
3 C A C->A 52 110
4 D A D->A 169 71
5 E A E->A 104 92
6 A B A->B 93 142
7 B B B->B 57 139
8 C B C->B 98 130
9 D B D->B 81 71
10 E B E->B 138 108
11 A C A->C 147 76
12 B C B->C 122 43
13 C C C->C 123 176
14 D C D->C 130 134
15 E C E->C 99 225
16 A D A->D 84 126
17 B D B->D 115 134
18 C D C->D 102 89
19 D D D->D 113 92
20 E D E->D 126 111
21 A E A->E 106 139
22 B E B->E 127 70
23 C E C->E 189 116
24 D E D->E 187 172
25 E E E->E 88 161
ちなみに,久保さんのプログラムと集計結果
> na.to.zero <- function(v) sapply(v, function(x) ifelse(is.na(x), 0, x))
>
> count.type <- function(data, col.count, col.years)
+ {
+ n <- length(col.years)
+ # prepare df.count
+ v.type <- sort(unique(unlist(data[, col.years]))) # 1, 2, 3, ...
+ df.count <- data.frame(type = v.type)
+ matrix.zero <- matrix(0, nrow(df.count), n)
+ df.count <- cbind(df.count, matrix.zero)
+ # count
+ colnames(df.count)[2:(1 + n)] <- col.years
+ for (y in col.years) {
+ v.count <- tapply(
+ data[, col.count],
+ factor(data[, y], levels = as.character(v.type)),
+ sum
+ )
+ df.count[, y] <- na.to.zero(v.count[df.count$type])
+ }
+ return(df.count)
+ }
>
> count.change <- function(data, col.count, col.years, sep = "->")
+ {
+ n <- length(col.years)
+ # prepare df.count
+ v.type <- sort(unique(unlist(data[, col.years]))) # 1, 2, 3, ...
+ df.count <- expand.grid(v.type, v.type) # all possilbe combinations
+ colnames(df.count) <- c("type.T1", "type.T2")
+ df.count$type.T1T2 <- apply(
+ df.count, 1, function(x) paste(x[1], x[2], sep = sep)
+ )
+ matrix.zero <- matrix(0, nrow(df.count), n - 1)
+ df.count <- cbind(df.count, matrix.zero)
+ colnames(df.count)[4:(2 + n)] <- col.years[1:(n - 1)]
+ # count
+ for (y in 1:(n - 1)) {
+ y1 <- col.years[y]
+ y2 <- col.years[y + 1]
+ v.change <- tapply(
+ data[, col.count],
+ factor(
+ apply(data, 1, function(x) paste(x[y1], x[y2], sep = sep)),
+ levels = as.character(df.count$type.T1T2)
+ ),
+ sum
+ )
+ df.count[, y1] <- na.to.zero(v.change[df.count$type.T1T2])
+ }
+ return(df.count)
+ }
> count.change(d, "n", c("year1", "year2", "year3"))
type.T1 type.T2 type.T1T2 year1 year2
1 A A A->A 101 89
2 B A B->A 146 81
3 C A C->A 52 110
4 D A D->A 169 71
5 E A E->A 104 92
6 A B A->B 93 142
7 B B B->B 57 139
8 C B C->B 98 130
9 D B D->B 81 71
10 E B E->B 138 108
11 A C A->C 147 76
12 B C B->C 122 43
13 C C C->C 123 176
14 D C D->C 130 134
15 E C E->C 99 225
16 A D A->D 84 126
17 B D B->D 115 134
18 C D C->D 102 89
19 D D D->D 113 92
20 E D E->D 126 111
21 A E A->E 106 139
22 B E B->E 127 70
23 C E C->E 189 116
24 D E D->E 187 172
25 E E E->E 88 161
同じになっているはず