dak ブログ

python、rubyなどのプログラミング、MySQL、サーバーの設定などの備忘録。レゴの写真も。

BigQuery で重み付きでベクトルを加算

2023-11-06 23:43:15 | BigQuery
BigQuery で重み付きでベクトルを加算する方法のメモ。

item に重み付きで属性が付与されていて、属性がベクトルで表現されている場合に、
item のベクトルを重み付きで属性のベクトルを加算して計算する。

■item の重み付き属性
create table dataset.item_weight_attrs (
  id           string,
  attr_infos   array<struct<attr as string, weight as integer>>
);

insert into
  dataset.item_weight_attrs
  (id, attr_infos)
values
  ('id1', [struct('attr1' as attr, 1 as weight), struct('attr2' as attr, 2 as weight)])
  , ('id2', [struct('attr2' as attr, 2 as weight), struct('attr3' as attr, 1 as weight)])
  , ('id3', [struct('attr1' as attr, 3 as weight), struct('attr2' as attr, 2 as weight), struct('attr3' as attr, 1 as weight)]);

■属性のベクトル
create table dataset.attr_vectors (
  attr         string,
  vector       array<integer>
);

insert into
  dataset.attr_vectors
  (attr, vector)
values
  ("attr1", ["0", "1", "2", "3", "4"])
  , ("attr2", ["1", "2", "3", "4", "5"])
  , ("attr3", ["2", "3", "4", "5", "6"]);

■item のベクトル生成
with wvs as (
  select
    iwa.id as id
    , array_agg(
        struct(
          attr_info.weight as weight
          , attr_info.vector as vector
        )
      ) as weight_vector
  from
    dataset.item_weight_attrs as iwa
    , unnest(iwa.attr_infos) as attr_info
    , dataset.attr_vector as av
  where
    av.attr = attr_info.attr
  group by
    iwa.id
)

select
  wvs.id
  , array(
      select
        sum(e * wv.weight)
      from
        wvs as wvs2
        , unnest(wvs2.weight_vector) as wv
        , unnest(wv.vector) as e with offset idx
      where
        wvs2.id = wvs.id
      group by
        idx
    ) as sum_vector
from
  wvs
;

■item のベクトル
[{
  "id": "id1",
  "sum_vector": ["2", "5", "8", "11", "14"]
}, {
  "id": "id2",
  "sum_vector": ["4", "7", "10", "13", "16"]
}, {
  "id": "id3",
  "sum_vector": ["4", "10", "16", "22", "28"]
}]