dak ブログ

python、rubyなどのプログラミング、MySQL、サーバーの設定などの備忘録。レゴの写真も。

BigQuery でレコードの有効期限を設定

2023-11-25 23:09:48 | BigQuery
BigQuery でレコードの有効期限を設定する方法のメモ。
■テーブル定義・データ登録
create table dataset.test_partition1 (
  jst_date date
  , log_text string
)
partition by jst_date
options (
  partition_expiration_days = 3
  , require_partition_filter = true
);

insert into dataset.test_partition1 (jst_date, log_text) values ('2023-11-20', 'log_20231120');
insert into dataset.test_partition1 (jst_date, log_text) values ('2023-11-21', 'log_20231121');
insert into dataset.test_partition1 (jst_date, log_text) values ('2023-11-22', 'log_20231122');
insert into dataset.test_partition1 (jst_date, log_text) values ('2023-11-23', 'log_20231123');
insert into dataset.test_partition1 (jst_date, log_text) values ('2023-11-24', 'log_20231124');

■データ確認
select current_date();

[{
  "f0_": "2023-11-24"
}]

select
  *
from
  dataset.test_partition1
where
  jst_date >= '2023-11-01'
order by
  jst_date asc
;

[{"jst_date": "2023-11-21", "log_text": "log_20231121"},
 {"jst_date": "2023-11-22", "log_text": "log_20231122"},
 {"jst_date": "2023-11-23", "log_text": "log_20231123"},
 {"jst_date": "2023-11-24", "log_text": "log_20231124"}]

レコードの有効期限を変更
alter table dataset.test_partition1 set options (
  partition_expiration_days = 2
  , require_partition_filter = true)
;

select
  *
from
  dataset.test_partition1
where
  jst_date >= '2023-11-01'
order by
  jst_date asc
;

[{"jst_date": "2023-11-22", "log_text": "log_20231122"},
 {"jst_date": "2023-11-23", "log_text": "log_20231123"},
 {"jst_date": "2023-11-24", "log_text": "log_20231124"}]


BigQuery のユーザ定義関数でベクトルの重み付き和を計算

2023-11-09 23:57:52 | BigQuery
BigQuery のユーザ定義関数でベクトルの重み付き和を計算する方法のメモ。
■ユーザ定義関数
create or replace function dataset.vector_wsum_int(wvs array<struct<w integer, v array<integer>>>)
returns array<integer> as (
  array(
    select
      sum(e * w)
    from
      unnest(wvs) as wv
      , unnest(wv.v) as e with offset idx
    group by
      idx
  )
);

■クエリ
select
  dataset.vector_wsum_int([
    struct(2 as w, [1, 2, 3, 4] as v),
    struct(3 as w, [2, 3, 4, 5] as v),
    struct(4 as w, [3, 4, 5, 6] as v)
  ])
;

■実行結果
[{
  "f0_": ["20", "29", "38", "47"]
}]


BigQuery のユーザ定義関数でベクトルの各要素に重み付け

2023-11-09 00:11:20 | BigQuery
BigQuery のユーザ定義関数でベクトルの各要素に重み付けをする方法のメモ。

■ユーザ定義関数
create function dataset.vector_weight_int(v array<integer>, m integer)
returns array<integer> as (
  array(
    select
      e * m
    from
      unnest(v) as e
  )
);

■テスト
select
  dataset.vector_weight_int([1, 2, 3, 4, 5], 3)
;

■実行結果
[{
  "f0_": ["3", "6", "9", "12", "15"]
}]


BigQuery で重み付きでベクトルを加算

2023-11-06 23:43:15 | BigQuery
BigQuery で重み付きでベクトルを加算する方法のメモ。

item に重み付きで属性が付与されていて、属性がベクトルで表現されている場合に、
item のベクトルを重み付きで属性のベクトルを加算して計算する。

■item の重み付き属性
create table dataset.item_weight_attrs (
  id           string,
  attr_infos   array<struct<attr as string, weight as integer>>
);

insert into
  dataset.item_weight_attrs
  (id, attr_infos)
values
  ('id1', [struct('attr1' as attr, 1 as weight), struct('attr2' as attr, 2 as weight)])
  , ('id2', [struct('attr2' as attr, 2 as weight), struct('attr3' as attr, 1 as weight)])
  , ('id3', [struct('attr1' as attr, 3 as weight), struct('attr2' as attr, 2 as weight), struct('attr3' as attr, 1 as weight)]);

■属性のベクトル
create table dataset.attr_vectors (
  attr         string,
  vector       array<integer>
);

insert into
  dataset.attr_vectors
  (attr, vector)
values
  ("attr1", ["0", "1", "2", "3", "4"])
  , ("attr2", ["1", "2", "3", "4", "5"])
  , ("attr3", ["2", "3", "4", "5", "6"]);

■item のベクトル生成
with wvs as (
  select
    iwa.id as id
    , array_agg(
        struct(
          attr_info.weight as weight
          , attr_info.vector as vector
        )
      ) as weight_vector
  from
    dataset.item_weight_attrs as iwa
    , unnest(iwa.attr_infos) as attr_info
    , dataset.attr_vector as av
  where
    av.attr = attr_info.attr
  group by
    iwa.id
)

select
  wvs.id
  , array(
      select
        sum(e * wv.weight)
      from
        wvs as wvs2
        , unnest(wvs2.weight_vector) as wv
        , unnest(wv.vector) as e with offset idx
      where
        wvs2.id = wvs.id
      group by
        idx
    ) as sum_vector
from
  wvs
;

■item のベクトル
[{
  "id": "id1",
  "sum_vector": ["2", "5", "8", "11", "14"]
}, {
  "id": "id2",
  "sum_vector": ["4", "7", "10", "13", "16"]
}, {
  "id": "id3",
  "sum_vector": ["4", "10", "16", "22", "28"]
}]


BigQuery でテーブル内の全配列の加算

2023-11-02 23:49:37 | BigQuery
BigQuery でテーブル内の全レコードの配列を加算する方法のメモ。
■テーブル定義
create table dataset.vector_test (
  id      string,
  vector  array<integer>
);

■データ登録
insert into
  dataset.vector_test
  (id, vector)
values
  ('id1', [0, 1, 2, 3, 4]),
  ('id2', [1, 2, 3, 4, 5]),
  ('id3', [2, 3, 4, 5, 6]);

■加算クエリ
select
array(
  select 
    sum(e)
  from
    dataset.vector_test
    , unnest(vector) as e with offset idx
  group by
    idx
) as sum_vector
;

■結果
[{
  "sum_vector": ["3", "6", "9", "12", "15"]
}]