在PostgreSQL中实现update delete limit - CTID扫描实践 (高效阅后即焚)

4 minute read

背景

使用MySQL 的用户可能会比较熟悉这样的用法,更新或删除时可以指定限制更新或删除多少条记录。

达到限定的记录数后不再继续进行下去,而是返回。

delete from tbl where xxx limit 100;  
  
update tbl set xxx=xxx where xxx limit 100;  

目前PostgreSQL没有类似的语法,但是可以通过其他手段来达到同样的效果。

一、with语法实现

创建测试表

postgres=# create table t(id int primary key, info text);  
CREATE TABLE  
postgres=# insert into t select generate_series(1,1000000);  
INSERT 0 1000000  
update delete limit 用法如下
postgres=# with t1 as (select id from t where id between 1 and 1000 limit 10
)   
                  delete from t where id in (select * from t1);  
DELETE 10  
  
postgres=# with t1 as (select id from t where id between 1 and 1000 limit 10
)   
                   update t set info='new' where id in (select * from t1);  
UPDATE 10  
  
postgres=# explain with t1 as (select id from t where id between 1 and 1000 
limit 10)   
postgres-#                    update t set info='new' where id in (select * 
from t1);  
                                      QUERY 
PLAN                                         
-----------------------------------------------------------------------------
----------  
 Update on t  (cost=4.89..12.93 rows=1 width=38)  
   CTE t1  
     ->  Limit  (cost=0.42..4.44 rows=1 width=4)  
           ->  Index Only Scan using t_pkey on t t_1  (cost=0.42..4.44 rows=1 
width=4)  
                 Index Cond: ((id >= 1) AND (id <= 1000))  
   ->  Nested Loop  (cost=0.45..8.48 rows=1 width=38)  
         ->  HashAggregate  (cost=0.02..0.03 rows=1 width=32)  
               Group Key: t1.id  
               ->  CTE Scan on t1  (cost=0.00..0.02 rows=1 width=32)  
         ->  Index Scan using t_pkey on t  (cost=0.42..8.44 rows=1 width=10)  
               Index Cond: (id = t1.id)  
(11 rows)  

注意,不能用in子句和CTID结合,没走tid scan方法。性能会很差

postgres=#  with t1 as (select ctid from t where id between 1 and 100000 
limit 10) delete from t where ctid in (select ctid from t1);  
DELETE 10  
  
postgres=#  with t1 as (select ctid from t where id between 1 and 100000 
limit 10) update t set info='new' where ctid in (select ctid from t1);  
UPDATE 10  
  
postgres=# explain with t1 as (select ctid from t where id between 1 and 
100000 limit 10) update t set info='new' where ctid in (select ctid from t1);  
                                       QUERY 
PLAN                                          
-----------------------------------------------------------------------------
------------  
 Update on t  (cost=129834.64..134784.79 rows=10 width=40)  
   CTE t1  
     ->  Limit  (cost=0.42..0.78 rows=10 width=6)  
           ->  Index Scan using t_pkey on t t_1  (cost=0.42..3178.93 rows=
89225 width=6)  
                 Index Cond: ((id >= 1) AND (id <= 100000))  
   ->  Merge Semi Join  (cost=129833.86..134784.01 rows=10 width=40)  
         Merge Cond: (t.ctid = t1.ctid)  
         ->  Sort  (cost=129833.49..132308.49 rows=990000 width=10)  
               Sort Key: t.ctid  
               ->  Seq Scan on t  (cost=0.00..14325.00 rows=990000 width=10)  
-- 这个NODE非常慢  
         ->  Sort  (cost=0.37..0.39 rows=10 width=36)  
               Sort Key: t1.ctid  
               ->  CTE Scan on t1  (cost=0.00..0.20 rows=10 width=36)  
(13 rows)  

正规的写法

使用如下的写法即可:

postgres=# with t1 as (select ctid from test limit 10) delete from test where 
ctid = any (array(select ctid from t1));
DELETE 10
postgres=# with t1 as (select ctid from test limit 10) delete from test where 
ctid = any (array(select ctid from t1)) returning *;
 id 
----
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
(10 rows)

DELETE 10
postgres=# with t1 as (select ctid from test limit 10) delete from test where 
ctid = any (array(select ctid from t1)) returning *;
 id 
----
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
(10 rows)

DELETE 10
  
postgres=# explain with t1 as (select ctid from test limit 10) delete from 
test where ctid = any (array(select ctid from t1)) returning *;
                                     QUERY 
PLAN                                     
-----------------------------------------------------------------------------
-------
 Delete on test  (cost=0.36..13.46 rows=10 width=6)
   CTE t1
     ->  Limit  (cost=0.00..0.14 rows=10 width=6)
           ->  Seq Scan on test test_1  (cost=0.00..144247.77 rows=9999977 
width=6)
   InitPlan 2 (returns $1)
     ->  CTE Scan on t1  (cost=0.00..0.20 rows=10 width=6)
   ->  Tid Scan on test  (cost=0.01..13.11 rows=10 width=6)
         TID Cond: (ctid = ANY ($1))
(8 rows)

例如,按时序或序列顺序处理数据,每次处理若干条。

create table test(id int, crt_time timestamp);
insert into test select generate_series(1,1000000), clock_timestamp();
create index idx_test_1 on test(crt_time);

postgres=# with t1 as (select ctid from test order by crt_time limit 1000) 
delete from test where ctid = any (array(select ctid from t1)) ;
DELETE 1000
Time: 1.905 ms
postgres=# with t1 as (select ctid from test order by crt_time limit 1000) 
delete from test where ctid = any (array(select ctid from t1)) ;
DELETE 1000
Time: 1.470 ms
postgres=# with t1 as (select ctid from test order by crt_time limit 1000) 
delete from test where ctid = any (array(select ctid from t1)) ;
DELETE 1000
Time: 1.396 ms

是不是很爽呢?

二、函数+游标实现

do language plpgsql $$  
declare   
  rowvar record;  
 cur cursor for select * from t where id between 1 and 1000000 limit 10;  
begin  
  open cur;   
  loop   
    fetch cur into rowvar;  
    if found then   
      update t set info='new' where current of cur returning * into rowvar; 
      raise notice '%', rowvar;  
    else  
      return;  
    end if;  
  end loop;   
  close cur;  
end;  
$$;  
  
NOTICE:  00000: (10011,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10012,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10013,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10014,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10015,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10016,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10017,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10018,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10019,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
NOTICE:  00000: (10020,new)  
LOCATION:  exec_stmt_raise, pl_exec.c:3216  
DO  

最简单、性能最好的delete, update LIMIT SQL写法(推荐)

delete from tbl where ctid = any(array(select ctid from tbl where xxx limit xx));

update tbl set xx=xx where ctid = any(array(select ctid from tbl where xxx limit xx));

可并发,不堵塞的批量更新,删除方法

with a as (delete from t_result where ctid= any(array(     
  select ctid from t_result order by id limit 10 for update skip locked  -- 可以并发消费,不会相互堵塞,消费顺与写入顺序一致    
)) returning *)    
select * from a;    

参考

《PostgreSQL 流式处理应用实践 - 二手商品实时归类(异步消息notify/listen、阅后即焚)》

Flag Counter

digoal’s 大量PostgreSQL文章入口