Greenplum 最佳实践 - 如何支持反转索引(reverse, orafunc)

2 minute read

背景

GP的反转索引可以通过函数reverse来实现,但是这个函数在GP的版本中没有,所以需要port过来。

可以在9.5的代码中找到

src/backend/utils/adt/varlena.c

$ vi reverse.c  
  
#include <string.h>  
#include "postgres.h"  
#include "fmgr.h"  
  
PG_MODULE_MAGIC;  
  
PG_FUNCTION_INFO_V1(text_reverse);  
  
  
/*  
 * Return reversed string  
 */  
Datum  
text_reverse(PG_FUNCTION_ARGS)  
{  
        text       *str = PG_GETARG_TEXT_PP(0);  
        const char *p = VARDATA_ANY(str);  
        int                     len = VARSIZE_ANY_EXHDR(str);  
        const char *endp = p + len;  
        text       *result;  
        char       *dst;  
  
        result = palloc(len + VARHDRSZ);  
        dst = (char *) VARDATA(result) + len;  
        SET_VARSIZE(result, len + VARHDRSZ);  
  
        if (pg_database_encoding_max_length() > 1)  
        {  
                /* multibyte version */  
                while (p < endp)  
                {  
                        int                     sz;  
  
                        sz = pg_mblen(p);  
                        dst -= sz;  
                        memcpy(dst, p, sz);  
                        p += sz;  
                }  
        }  
        else  
        {  
                /* single byte version */  
                while (p < endp)  
                        *(--dst) = *p++;  
        }  
  
        PG_RETURN_TEXT_P(result);  
}  

编译

gcc -O3 -Wall -Wextra -Werror -I /home/digoal/gpsrc/src/include -g -fPIC -c ./reverse.c -o reverse.o  
gcc -O3 -Wall -Wextra -Werror -I /home/digoal/gpsrc/src/include -g -shared reverse.o -o libreverse.so  
  
cp libreverse.so /home/digoal/gphome/lib  

拷贝到所有节点

gpscp -f ./host /home/digoal/gphome/lib/libreverse.so =:/home/digoal/gphome/lib/  

创建函数并测试可用性

postgres=# create or replace function reverse(text) returns text as '/home/digoal/gphome/lib/libreverse.so', 'text_reverse' language C STRICT immutable;  
CREATE FUNCTION  
postgres=# select reverse('abc');  
 reverse   
---------  
 cba  
(1 row)  
  
postgres=# select reverse('a f d12');  
 reverse   
---------  
 21d f a  
(1 row)  
  
postgres=# select reverse(null);  
 reverse   
---------  
  
(1 row)  

创建反转索引测试

postgres=# create table t(id int, info text);  
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.  
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.  
CREATE TABLE  
postgres=# create index idx on t(reverse(info));  
CREATE INDEX  
postgres=# insert into t select id,md5(random()::text) from generate_series(1,1000000) t(id);  
INSERT 0 1000000  
  
postgres=# select id,info,reverse(info) from t limit 10;  
  id  |               info               |             reverse                
------+----------------------------------+----------------------------------  
  197 | fefb23cb1705e6faa74601e6cd0dc8d7 | 7d8cd0dc6e10647aaf6e5071bc32bfef  
  314 | 64c3d79458fc0ba2413e5493582830dd | dd0382853945e3142ab0cf85497d3c46  
  426 | e0486de86c2c6bd72912fbc33d059de8 | 8ed950d33cbf21927db6c2c68ed6840e  
  715 | c5087148a8086e2201a63b158268adbb | bbda862851b36a1022e6808a8417805c  
  768 | 50ebdba7ff260d5adc11495313817221 | 12271831359411cda5d062ff7abdbe05  
  944 | 8da13db138858ec1f78193f5c16cc310 | 013cc61c5f39187f1ce858831bd31ad8  
 1057 | cf029096c2c66714861d0adba9ab49d8 | 8d94ba9abda0d16841766c2c690920fc  
 1233 | ae6eb1bdf32b15c73c1a04adf54b9881 | 1889b45fda40a1c37c51b23fdb1be6ea  
 1286 | 9943f89159055e0450765ab0968a1ad8 | 8da1a8690ba5670540e55095198f3499  
 1575 | b8f0337315d238070984b9883a965c57 | 75c569a3889b489070832d5137330f8b  
(10 rows)  
  
postgres=# select * from t where reverse(info)>='7d8cd0dc6e10647aaf6e507' and reverse(info)<'7d8cd0dc6e10647aaf6e508';  
 id  |               info                 
-----+----------------------------------  
 197 | fefb23cb1705e6faa74601e6cd0dc8d7  
(1 row)  
  
postgres=# explain analyze select * from t where reverse(info)>='7d8cd0dc6e10647aaf6e507' and reverse(info)<'7d8cd0dc6e10647aaf6e508';  
                                                           QUERY PLAN                                                             
--------------------------------------------------------------------------------------------------------------------------------  
 Gather Motion 240:1  (slice1; segments: 240)  (cost=11012.90..13972.90 rows=40001 width=37)  
   Rows out:  1 rows at destination with 9.113 ms to first row, 43 ms to end, start offset by 1.317 ms.  
   ->  Bitmap Heap Scan on t  (cost=11012.90..13972.90 rows=167 width=37)  
         Recheck Cond: reverse(info) >= '7d8cd0dc6e10647aaf6e507'::text AND reverse(info) < '7d8cd0dc6e10647aaf6e508'::text  
         Rows out:  1 rows (seg46) with 0.156 ms to first row, 0.178 ms to end, start offset by 9.850 ms.  
         ->  Bitmap Index Scan on idx  (cost=0.00..11002.90 rows=167 width=0)  
               Index Cond: reverse(info) >= '7d8cd0dc6e10647aaf6e507'::text AND reverse(info) < '7d8cd0dc6e10647aaf6e508'::text  
               Bitmaps out:  Avg 1.0 x 240 workers.  Max 1 (seg0) with 0.021 ms to end, start offset by 8.845 ms.  
               Work_mem used:  9K bytes.  
 Slice statistics:  
   (slice0)    Executor memory: 475K bytes.  
   (slice1)    Executor memory: 321K bytes avg x 240 workers, 329K bytes max (seg46).  Work_mem: 9K bytes max.  
 Statement statistics:  
   Memory used: 128000K bytes  
 Total runtime: 71.958 ms  
(15 rows)  

适用场景:

1. 带后缀的检索。

select * from table where reverse(info) like 'abcd%';  
  
等同于  
  
select * from table where info like '%dcba';  

其他

有原生支持的reverse(),跑一下/usr/local/greenplum-db/share/postgresql/contrib/orafun.sql 即可,无需自定义函数

Flag Counter

digoal’s 大量PostgreSQL文章入口