PostgreSQL 9.3 add LATERAL support - LATERAL的语法和用法介绍

9 minute read

背景

LATERAL是SQL:2011的标准(T491 : LATERAL derived table), 在FROM 或者JOIN子句的子查询里面可以关联查询LATERAL前面的FROM子句(或ITEM)或者JOIN子句(或ITEM).

例如手册上提到的 :

SELECT * FROM foo, LATERAL (SELECT * FROM bar WHERE bar.id = foo.bar_id) ss;    
  
在 LATERAL (这里可以关联(引用)lateral左边的表或子句)  
  
所以允许:   
  
LATERAL (SELECT * FROM bar WHERE bar.id = foo.bar_id)  

测试

首先从github上下载PostgreSQL 9.3 源码,

wget https://github.com/postgres/postgres/downloads    

安装简单步骤 :

useradd pgdev    
    
su - pgdev    
    
vi .bash_profile    
# add by digoal    
export PS1="$USER@`/bin/hostname -s`-> "    
export PGPORT=9300    
export PGUSER=postgres    
export PGDATA=/data04/pgdev/pg_root    
export LANG=en_US.utf8    
export PGHOME=/home/pgdev/pgsql9.3    
export PGHOST=$PGDATA    
    
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib    
export DATE=`date +"%Y%m%d%H%M"`    
export PATH=$PGHOME/bin:$PATH:.    
export MANPATH=$PGHOME/share/man:$MANPATH    
alias rm='rm -i'    
alias ll='ls -lh'    
    
su - root    
    
mkdir -p /data04/pgdev/pg_root    
    
chown -R pgdev:pgdev /data04/pgdev    
    
. /home/pgdev/.bash_profile    
    
cd /home/pgdev    
    
unzip pg9.3.zip    
    
cd postgres-postgres-95d035e    
    
./configure --prefix=/home/pgdev/pgsql9.3 --with-pgport=9300 --with-perl --with-python --with-tcl --with-openssl --with-pam--without-ldap --with-libxml --with-libxslt --enable-thread-safety --with-wal-blocksize=16 && gmake    
    
gmake install    
    
su - pgdev    
    
initdb -D $PGDATA -E UTF8 --locale=C -U postgres -W    

启动数据库

pg_ctl start    

测试表 :

CREATE TABLE test (    
    username TEXT,    
    some_ts timestamptz,    
    random_value INT4    
);    

测试数据 :

INSERT INTO test (username, some_ts, random_value)    
SELECT    
    'user #' || cast(floor(random() * 10) as int4),    
    now() - '1 year'::INTERVAL * random(),    
    cast(random() * 100000000 as INT4)    
FROM    
    generate_series(1,2000000);    

索引 :

CREATE INDEX i on test (username, some_ts);    

分析表 :

analyze test;    

使用递归查询模拟index skip scan, 提高查询速度.

WITH RECURSIVE skip AS (    
    ( SELECT t.username FROM test as t ORDER BY t.username limit 1 )    
    union all    
    ( SELECT (SELECT min( t2.username ) FROM test t2 WHERE t2.username > s.username ) FROM skip s WHERE s.username IS NOT NULL )    
)    
SELECT    
    x.*    
FROM    
    skip s,    
    lateral (    
        SELECT t.*    
        FROM test t    
        WHERE t.username = s.username    
        ORDER BY t.some_ts desc LIMIT 5    
    ) as x;    

执行计划

 Nested Loop  (cost=92.44..588.61 rows=505 width=20) (actual time=0.080..0.730 rows=50 loops=1)    
   CTE skip    
     ->  Recursive Union  (cost=0.00..92.44 rows=101 width=32) (actual time=0.037..0.327 rows=11 loops=1)    
           ->  Limit  (cost=0.00..0.83 rows=1 width=8) (actual time=0.034..0.034 rows=1 loops=1)    
                 ->  Index Only Scan using i1 on test t_1  (cost=0.00..1667358.25 rows=2000000 width=8) (actual time=0.033..0.033 ro    
ws=1 loops=1)    
                       Heap Fetches: 1    
           ->  WorkTable Scan on skip s_1  (cost=0.00..8.96 rows=10 width=32) (actual time=0.025..0.025 rows=1 loops=11)    
                 Filter: (username IS NOT NULL)    
                 Rows Removed by Filter: 0    
                 SubPlan 2    
                   ->  Result  (cost=0.87..0.88 rows=1 width=0) (actual time=0.026..0.026 rows=1 loops=10)    
                         InitPlan 1 (returns $2)    
                           ->  Limit  (cost=0.00..0.87 rows=1 width=8) (actual time=0.025..0.025 rows=1 loops=10)    
                                 ->  Index Only Scan using i1 on test t2  (cost=0.00..577241.50 rows=666667 width=8) (actual time=0.    
024..0.024 rows=1 loops=10)    
                                       Index Cond: ((username IS NOT NULL) AND (username > s_1.username))    
                                       Heap Fetches: 9    
   ->  CTE Scan on skip s  (cost=0.00..2.02 rows=101 width=32) (actual time=0.040..0.337 rows=11 loops=1)    
   ->  Limit  (cost=0.00..4.79 rows=5 width=20) (actual time=0.024..0.033 rows=5 loops=11)    
         ->  Index Scan Backward using i1 on test t  (cost=0.00..191701.23 rows=200000 width=20) (actual time=0.024..0.031 rows=5 lo    
ops=11)    
               Index Cond: (username = s.username)    
 Total runtime: 0.814 ms    

执行结果

 username |            some_ts            | random_value     
----------+-------------------------------+--------------    
 user #0  | 2012-10-08 15:48:45.553316+08 |     88033114    
 user #0  | 2012-10-08 15:47:47.751716+08 |      1162776    
 user #0  | 2012-10-08 15:46:32.842916+08 |      5801749    
 user #0  | 2012-10-08 15:44:55.642916+08 |     94951593    
 user #0  | 2012-10-08 15:42:52.522916+08 |     24722119    
 user #1  | 2012-10-08 15:44:48.471716+08 |     60946458    
 user #1  | 2012-10-08 15:35:37.153316+08 |     12236441    
 user #1  | 2012-10-08 15:33:01.460516+08 |     34209317    
 user #1  | 2012-10-08 15:32:36.231716+08 |     91423982    
 user #1  | 2012-10-08 15:31:57.351716+08 |     46008329    
 user #2  | 2012-10-08 15:48:21.188516+08 |     18521498    
 user #2  | 2012-10-08 15:48:07.710116+08 |     30330399    
 user #2  | 2012-10-08 15:44:47.175716+08 |     20635202    
 user #2  | 2012-10-08 15:42:47.166116+08 |     24079275    
 user #2  | 2012-10-08 15:41:37.095716+08 |      4902864    
 user #3  | 2012-10-08 15:42:26.084516+08 |     52659326    
 user #3  | 2012-10-08 15:41:35.194916+08 |     69423103    
 user #3  | 2012-10-08 15:38:00.145316+08 |     84811251    
 user #3  | 2012-10-08 15:35:06.308516+08 |     75104712    
 user #3  | 2012-10-08 15:34:55.594916+08 |     73931958    
 user #4  | 2012-10-08 15:49:11.732516+08 |     78025638    
 user #4  | 2012-10-08 15:47:19.412516+08 |      9725755    
 user #4  | 2012-10-08 15:45:07.134116+08 |     41955915    
 user #4  | 2012-10-08 15:43:32.612516+08 |      8423193    
 user #4  | 2012-10-08 15:42:14.161316+08 |     76958806    
 user #5  | 2012-10-08 15:46:15.649316+08 |     11810519    
 user #5  | 2012-10-08 15:43:08.334116+08 |     58534489    
 user #5  | 2012-10-08 15:39:19.892516+08 |      8442981    
 user #5  | 2012-10-08 15:37:09.860516+08 |     70260530    
 user #5  | 2012-10-08 15:36:11.626916+08 |     31693867    
 user #6  | 2012-10-08 15:35:00.865316+08 |     73969641    
 user #6  | 2012-10-08 15:32:59.818916+08 |     93242875    
 user #6  | 2012-10-08 15:31:04.474916+08 |     19057377    
 user #6  | 2012-10-08 15:28:27.572516+08 |     32746489    
 user #6  | 2012-10-08 15:28:15.562916+08 |     53467567    
 user #7  | 2012-10-08 15:45:52.753316+08 |     87485440    
 user #7  | 2012-10-08 15:44:07.172516+08 |     79248597    
 user #7  | 2012-10-08 15:37:00.961316+08 |     42857958    
 user #7  | 2012-10-08 15:31:24.519716+08 |     95801165    
 user #7  | 2012-10-08 15:24:51.572516+08 |     68538811    
 user #8  | 2012-10-08 15:47:58.810916+08 |     72727414    
 user #8  | 2012-10-08 15:46:36.903716+08 |     31336421    
 user #8  | 2012-10-08 15:46:19.364516+08 |     41223414    
 user #8  | 2012-10-08 15:41:28.974116+08 |     65943069    
 user #8  | 2012-10-08 15:41:17.223716+08 |     50431652    
 user #9  | 2012-10-08 15:49:09.658916+08 |     32979344    
 user #9  | 2012-10-08 15:47:22.350116+08 |     78366412    
 user #9  | 2012-10-08 15:45:29.511716+08 |     12629410    
 user #9  | 2012-10-08 15:43:49.201316+08 |     97705953    
 user #9  | 2012-10-08 15:40:07.930916+08 |     30183886    

未使用递归查询的话, 不管是seqscan还是indexonlyscan耗时都非常庞大.

select x.* from ( select t.username    
          from test t    
         group by t.username order by username ) as t1,    
  LATERAL(    
  select t.* from test t where    
  t.username=t1.username order by t.some_ts desc limit 5    
  ) as x;    

执行计划

 Nested Loop  (cost=0.00..1672407.28 rows=50 width=20) (actual time=0.078..2446.767 rows=50 loops=1)    
   ->  Group  (cost=0.00..1672358.25 rows=10 width=8) (actual time=0.037..2446.275 rows=10 loops=1)    
         ->  Index Only Scan using i1 on test t  (cost=0.00..1667358.25 rows=2000000 width=8) (actual time=0.033..2080.098 rows=2000    
000 loops=1)    
               Heap Fetches: 2000000    
   ->  Limit  (cost=0.00..4.79 rows=5 width=20) (actual time=0.035..0.041 rows=5 loops=10)    
         ->  Index Scan Backward using i1 on test t_1  (cost=0.00..191701.23 rows=200000 width=20) (actual time=0.030..0.035 rows=5     
loops=10)    
               Index Cond: (username = t.username)    
 Total runtime: 2446.833 ms    

执行结果

 username |            some_ts            | random_value     
----------+-------------------------------+--------------    
 user #0  | 2012-10-08 15:48:45.553316+08 |     88033114    
 user #0  | 2012-10-08 15:47:47.751716+08 |      1162776    
 user #0  | 2012-10-08 15:46:32.842916+08 |      5801749    
 user #0  | 2012-10-08 15:44:55.642916+08 |     94951593    
 user #0  | 2012-10-08 15:42:52.522916+08 |     24722119    
 user #1  | 2012-10-08 15:44:48.471716+08 |     60946458    
 user #1  | 2012-10-08 15:35:37.153316+08 |     12236441    
 user #1  | 2012-10-08 15:33:01.460516+08 |     34209317    
 user #1  | 2012-10-08 15:32:36.231716+08 |     91423982    
 user #1  | 2012-10-08 15:31:57.351716+08 |     46008329    
 user #2  | 2012-10-08 15:48:21.188516+08 |     18521498    
 user #2  | 2012-10-08 15:48:07.710116+08 |     30330399    
 user #2  | 2012-10-08 15:44:47.175716+08 |     20635202    
 user #2  | 2012-10-08 15:42:47.166116+08 |     24079275    
 user #2  | 2012-10-08 15:41:37.095716+08 |      4902864    
 user #3  | 2012-10-08 15:42:26.084516+08 |     52659326    
 user #3  | 2012-10-08 15:41:35.194916+08 |     69423103    
 user #3  | 2012-10-08 15:38:00.145316+08 |     84811251    
 user #3  | 2012-10-08 15:35:06.308516+08 |     75104712    
 user #3  | 2012-10-08 15:34:55.594916+08 |     73931958    
 user #4  | 2012-10-08 15:49:11.732516+08 |     78025638    
 user #4  | 2012-10-08 15:47:19.412516+08 |      9725755    
 user #4  | 2012-10-08 15:45:07.134116+08 |     41955915    
 user #4  | 2012-10-08 15:43:32.612516+08 |      8423193    
 user #4  | 2012-10-08 15:42:14.161316+08 |     76958806    
 user #5  | 2012-10-08 15:46:15.649316+08 |     11810519    
 user #5  | 2012-10-08 15:43:08.334116+08 |     58534489    
 user #5  | 2012-10-08 15:39:19.892516+08 |      8442981    
 user #5  | 2012-10-08 15:37:09.860516+08 |     70260530    
 user #5  | 2012-10-08 15:36:11.626916+08 |     31693867    
 user #6  | 2012-10-08 15:35:00.865316+08 |     73969641    
 user #6  | 2012-10-08 15:32:59.818916+08 |     93242875    
 user #6  | 2012-10-08 15:31:04.474916+08 |     19057377    
 user #6  | 2012-10-08 15:28:27.572516+08 |     32746489    
 user #6  | 2012-10-08 15:28:15.562916+08 |     53467567    
 user #7  | 2012-10-08 15:45:52.753316+08 |     87485440    
 user #7  | 2012-10-08 15:44:07.172516+08 |     79248597    
 user #7  | 2012-10-08 15:37:00.961316+08 |     42857958    
 user #7  | 2012-10-08 15:31:24.519716+08 |     95801165    
 user #7  | 2012-10-08 15:24:51.572516+08 |     68538811    
 user #8  | 2012-10-08 15:47:58.810916+08 |     72727414    
 user #8  | 2012-10-08 15:46:36.903716+08 |     31336421    
 user #8  | 2012-10-08 15:46:19.364516+08 |     41223414    
 user #8  | 2012-10-08 15:41:28.974116+08 |     65943069    
 user #8  | 2012-10-08 15:41:17.223716+08 |     50431652    
 user #9  | 2012-10-08 15:49:09.658916+08 |     32979344    
 user #9  | 2012-10-08 15:47:22.350116+08 |     78366412    
 user #9  | 2012-10-08 15:45:29.511716+08 |     12629410    
 user #9  | 2012-10-08 15:43:49.201316+08 |     97705953    
 user #9  | 2012-10-08 15:40:07.930916+08 |     30183886    

lateral语法

https://www.postgresql.org/docs/devel/static/sql-select.html

lateral可以在FROM ITEM中,也可以在JOIN ITEM中。LATERAL后面可以是子查询、函数名。LATERAL中的子查询可以直接引用LATERAL左边的ITEM进行JOIN(类似exists, not exists子句中的引用用法)。

A LATERAL item can appear at top level in the FROM list, or within a JOIN tree.

In the latter case it can also refer to any items that are on the left-hand side of a JOIN that it is on the right-hand side of.

如果后面是函数名,那么可以不写lateral,因为JOIN中的函数会隐式使用lateral cross join.

Table functions appearing in FROM can also be preceded by the key word LATERAL, but for functions the key word is optional;

the function’s arguments can contain references to columns provided by preceding FROM items in any case.

    [ LATERAL ] ( select ) [ AS ] alias [ ( column_alias [, ...] ) ]  
    with_query_name [ [ AS ] alias [ ( column_alias [, ...] ) ] ]  
    [ LATERAL ] function_name ( [ argument [, ...] ] )  
                [ WITH ORDINALITY ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ]  
    [ LATERAL ] function_name ( [ argument [, ...] ] ) [ AS ] alias ( column_definition [, ...] )  
    [ LATERAL ] function_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] )  
    [ LATERAL ] ROWS FROM( function_name ( [ argument [, ...] ] ) [ AS ( column_definition [, ...] ) ] [, ...] )  
                [ WITH ORDINALITY ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ]  

带有LATERAL的SQL的计算步骤

1、逐行提取被lateral子句关联(引用)的FROM或JOIN中的ITEM(也叫source table)的记录(s)中的column(s)

for each row of the FROM item providing the cross-referenced column(s),
or set of rows of multiple FROM items providing the columns,

2、使用以上提取的column(s), 关联计算lateral子句中的ITEM

the LATERAL item is evaluated using that row or row set’s values of the columns.

3、lateral的计算结果row(s),与所有from, join ITEM(s)正常的进行JOIN计算

The resulting row(s) are joined as usual with the rows they were computed from.

4、从1到3开始循环,直到所有source table的行都取尽。

This is repeated for each row or set of rows from the column source table(s).

LATERAL在OUTER JOIN中的使用限制(或定义限制)

由于lateral的计算步骤是从source table逐条展开的,所以在OUTER JOIN时只能使用source table作为whole端,LATERAL内的ITEM不能作为WHOLE端。

因此lateral只能在left join的右边。或者right join的左边。因为它不能是WHOLE端。

The column source table(s) must be INNER or LEFT joined to the LATERAL item,   
  
else there would not be a well-defined set of rows from which to compute each set of rows for the LATERAL item.   
  
Thus, although a construct such as X RIGHT JOIN LATERAL Y is syntactically valid,   
  
it is not actually allowed for Y to reference X.  

lateral 举例

https://www.postgresql.org/docs/devel/static/queries-table-expressions.html#QUERIES-LATERAL

1、

LATERAL is primarily useful when the cross-referenced column is necessary for computing the row(s) to be joined. A common application is providing an argument value for a set-returning function.

For example, supposing that vertices(polygon) returns the set of vertices of a polygon, we could identify close-together vertices of polygons stored in a table with:

SELECT p1.id, p2.id, v1, v2  
FROM polygons p1, polygons p2,  
     LATERAL vertices(p1.poly) v1,  
     LATERAL vertices(p2.poly) v2  
WHERE (v1 <-> v2) < 10 AND p1.id != p2.id;  

函数调用,支持应用函数左边的ITEM(s),所以可以消除LATERAL,语义上是一样的。

(As already mentioned, the LATERAL key word is unnecessary in this example, but we use it for clarity.)

SELECT p1.id, p2.id, v1, v2  
FROM polygons p1, polygons p2,  
     vertices(p1.poly) v1,  
     vertices(p2.poly) v2  
WHERE (v1 <-> v2) < 10 AND p1.id != p2.id;  

2、

It is often particularly handy to LEFT JOIN to a LATERAL subquery, so that source rows will appear in the result even if the LATERAL subquery produces no rows for them.

For example, if get_product_names() returns the names of products made by a manufacturer, but some manufacturers in our table currently produce no products, we could find out which ones those are like this:

SELECT m.name  
FROM manufacturers m LEFT JOIN LATERAL get_product_names(m.id) pname ON true  
WHERE pname IS NULL;  

参考

1. performance tuning case :use cursor or trigger replace group by and order by

http://blog.163.com/digoal@126/blog/static/16387704020128142829610/

2. table or row variable :

http://blog.163.com/digoal@126/blog/static/1638770402012989219190/

3. http://www.depesz.com/2012/10/05/getting-top-n-rows-per-group/

4. http://www.postgresql.org/docs/devel/static/queries-table-expressions.html

5. https://github.com/postgres/postgres/downloads

6. http://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=5ebaaa49445eb1ba7b299bbea3a477d4e4c0430b

7. SRC :

doc/src/sgml/keywords.sgml    
doc/src/sgml/queries.sgml    
doc/src/sgml/ref/select.sgml    
doc/src/sgml/xfunc.sgml    
src/backend/catalog/information_schema.sql    
src/backend/catalog/sql_features.txt    
src/backend/nodes/outfuncs.c    
src/backend/optimizer/path/allpaths.c    
src/backend/optimizer/path/costsize.c    
src/backend/optimizer/path/indxpath.c    
src/backend/optimizer/path/joinpath.c    
src/backend/optimizer/path/joinrels.c    
src/backend/optimizer/path/tidpath.c    
src/backend/optimizer/plan/analyzejoins.c    
src/backend/optimizer/plan/createplan.c    
src/backend/optimizer/plan/initsplan.c    
src/backend/optimizer/plan/planagg.c    
src/backend/optimizer/plan/planner.c    
src/backend/optimizer/prep/prepjointree.c    
src/backend/optimizer/README    
src/backend/optimizer/util/placeholder.c    
src/backend/optimizer/util/var.c    
src/backend/parser/analyze.c    
src/backend/parser/gram.c    
src/backend/parser/gram.h    
src/backend/parser/gram.y    
src/backend/parser/parse_agg.c    
src/backend/parser/parse_clause.c    
src/backend/parser/parse_relation.c    
src/backend/utils/adt/ruleutils.c    
src/bin/psql/sql_help.c    
src/include/nodes/parsenodes.h    
src/include/nodes/relation.h    
src/include/parser/gram.h    
src/include/parser/kwlist.h    
src/include/parser/parse_node.h    
src/interfaces/ecpg/preproc/ecpg.addons    
src/interfaces/ecpg/preproc/preproc.c    
src/interfaces/ecpg/preproc/preproc.h    
src/interfaces/ecpg/preproc/preproc.y    
src/test/regress/expected/join.out    
src/test/regress/sql/join.sql    

Flag Counter

digoal’s 大量PostgreSQL文章入口