PostgreSQL pgbench tpcb 海量数据库测试 - 分区表测试优化
背景
pgbench是PG的一款测试工具,内置的测试CASE为tpcb测试。同时支持用户自己写测试CASE。
大量自定义CASE参考
https://github.com/digoal/blog/blob/master/201711/readme.md
当我们使用tpcb测试CASE时,如果生成的数据量过于庞大,例如我最近在生成1万亿的CASE,可以考虑使用分区表,但是目前PG内置分区表的性能在分区非常多时,使用PREPARED STATEMENT会导致性能下降。
《PostgreSQL 查询涉及分区表过多导致的性能问题 - 性能诊断与优化(大量BIND, spin lock, SLEEP进程)》
建议使用pg_pathman分区插件,消除这个性能问题。
或者可以把pgbench的SQL改一下,使用udf,动态SQL来实现,性能依旧杠杠的。
pgbench tpcb 分区表 UDF动态SQL实现
1、原生QUERY如下
读写case
\set aid random(1, 100000 * :scale)
\set bid random(1, 1 * :scale)
\set tid random(1, 10 * :scale)
\set delta random(-5000, 5000)
BEGIN;
UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
END;
只读case
\set aid random(1, 100000 * :scale)
SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
tpcb pgbench_accounts 分区表重构
1、定义结构
例如1万亿
pgbench -i -I dt --tablespace=tbs1 -s 10000000
2、使用分区
create table p (like pgbench_accounts) partition by RANGE ( aid ) tablespace tbs1;
3、创建4097个分区,每个分区244140625条记录
do language plpgsql $$
declare
i_rows_perpartition int8 := 244140625;
begin
for i in 0..4096 loop
execute format ('create table pgbench_accounts%s partition of p for values from (%s) to (%s) tablespace tbs1', i, i*i_rows_perpartition, (i+1)*i_rows_perpartition);
end loop;
end;
$$;
4、重命名表
drop table pgbench_accounts;
alter table p rename to pgbench_accounts;
-- alter table pgbench_accounts add constraint pk_pgbench_accounts_aid primary key (aid) using index tablespace tbs2;
5、新建任务表,并行加载数据
drop table task;
create table task(id int primary key);
insert into task select i from generate_series(0,4095) t(i);
6、新建基础数据表,写入244140625条数据。
create table init_accounts(aid int8);
insert into init_accounts select generate_series(0,244140624);
7、创建UDF,调用一次,往单个分区中写入244140625条数据.
create or replace function tpcb_init_accounts() returns void as $$
declare
v_id int;
begin
with tmp as (select * from task limit 1 for update skip locked),
tmp1 as (delete from task using tmp where task.id=tmp.id)
select id into v_id from tmp;
if found then
execute format ('insert into pgbench_accounts%s select aid+%s*244140625::int8, ((aid+%s*244140625::int8)-1)/100000 + 1, 0 from init_accounts on conflict do nothing', v_id, v_id, v_id);
end if;
end;
$$ language plpgsql strict;
8、使用pgbench,开启64个并发,生成1万亿条tpcb pgbench_accounts测试数据。
vi test.sql
select tpcb_init_accounts();
nohup pgbench -M prepared -n -r -f ./test.sql -c 64 -j 64 -t 100 >./init.log 2>&1 &
9、创建任务表,用于生成pgbench_accounts分区表PK
drop table task;
create table task(id int primary key);
insert into task select i from generate_series(0,4095) t(i);
10、新建UDF,用于生成pgbench_accounts分区表PK
create or replace function tpcb_init_accounts_pkey() returns void as $$
declare
v_id int;
begin
with tmp as (select * from task limit 1 for update skip locked),
tmp1 as (delete from task using tmp where task.id=tmp.id)
select id into v_id from tmp;
if found then
execute format ('analyze pgbench_accounts%s', v_id);
execute format ('alter table pgbench_accounts%s add constraint pk_pgbench_accounts%s_aid primary key (aid) using index tablespace tbs2', v_id, v_id);
end if;
end;
$$ language plpgsql strict;
11、生成pgbench_accounts分区表PK
vi test.sql
select tpcb_init_accounts_pkey();
nohup pgbench -M prepared -n -r -f ./test.sql -c 64 -j 64 -t 100 >./init.log 2>&1 &
如果使用的是pg_pathman插件,不用这么麻烦。
1、使用pgbench生成结构
pgbench -i -I dt --tablespace=tbs1 -s 10000000
2、转换为分区表
https://github.com/postgrespro/pg_pathman
《PostgreSQL 9.5+ 高效分区表实现 - pg_pathman》
3、使用pgbench生成数据
pgbench -i -I g -n --tablespace=tbs1 -s 10000000
4、新增PK
表级parallel
analyze 表
pgbench -i -I p -n -s 10000000 --index-tablespace=tbs2
tpcb UDF
1、读写
create or replace function tpcb_rw(
i_aid int8,
i_bid int4,
i_tid int4,
i_delta int4,
i_rows_perpartition int8
) returns int as $$
declare
i_suffix int := i_aid/i_rows_perpartition;
res int;
begin
execute format('execute p1_%s(%s,%s)', i_suffix, i_delta, i_aid);
execute format('execute p2_%s(%s)', i_suffix, i_aid) into res;
UPDATE pgbench_tellers SET tbalance = tbalance + i_delta WHERE tid = i_tid;
UPDATE pgbench_branches SET bbalance = bbalance + i_delta WHERE bid = i_bid;
INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (i_tid, i_bid, i_aid, i_delta, CURRENT_TIMESTAMP);
return res;
exception when others then
execute format('prepare p1_%s(int,int8) as UPDATE pgbench_accounts%s SET abalance = abalance + $1 WHERE aid = $2', i_suffix, i_suffix);
execute format('prepare p2_%s(int8) as SELECT abalance FROM pgbench_accounts%s WHERE aid = $1', i_suffix, i_suffix);
execute format('execute p1_%s(%s,%s)', i_suffix, i_delta, i_aid);
execute format('execute p2_%s(%s)', i_suffix, i_aid) into res;
return res;
end;
$$ language plpgsql strict;
2、只读
create or replace function tpcb_ro(
i_aid int8,
i_rows_perpartition int8
) returns int as $$
declare
i_suffix int := i_aid/i_rows_perpartition;
res int;
begin
execute format('execute p2_%s(%s)', i_suffix, i_aid) into res;
return res;
exception when others then
execute format('prepare p2_%s(int8) as SELECT abalance FROM pgbench_accounts%s WHERE aid = $1', i_suffix, i_suffix);
execute format('execute p2_%s(%s)', i_suffix, i_aid) into res;
return res;
end;
$$ language plpgsql strict;
3、改成自定义脚本
读写CASE
vi rw.sql
\set aid random(1, 100000 * :scale)
\set bid random(1, 1 * :scale)
\set tid random(1, 10 * :scale)
\set delta random(-5000, 5000)
SELECT tpcb_rw(:aid,:bid,:tid,:delta,244140625);
只读case
vi ro.sql
\set aid random(1, 100000 * :scale)
SELECT tpcb_ro(:aid,244140625);
参考
https://github.com/digoal/blog/blob/master/201711/readme.md
https://github.com/postgrespro/pg_pathman
《PostgreSQL 9.5+ 高效分区表实现 - pg_pathman》
《PostgreSQL Oracle 兼容性之 - DBMS_SQL(存储过程动态SQL中使用绑定变量-DB端prepare statement)》
https://www.postgresql.org/docs/devel/static/pgbench.html
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=60e612b602999e670f2d57a01e52799eaa903ca9