PostgreSQL 字符串 collate 与排序、memcpy优化 - 源码分析
背景
事情的起因是这样的,某个用户问我为什么在GP里面查询到的’A’ > ‘a’和PostgreSQL中查询到的结果不一样,一个是false, 一个true.
但是这个原因其实和Greenplum还是PostgreSQL是没关系的。
原因的根源还是collate。
例如,在使用比较操作符时,可以指定需要比较的字符串的collate来查看这里的区别:
postgres=# select 'a' > 'A' collate "en_US";
?column?
----------
f
(1 row)
postgres=# select 'a' > 'A' collate "C";
?column?
----------
t
(1 row)
postgres=# select 'a' > 'A' collate "zh_CN";
?column?
----------
f
(1 row)
操作符>的源代码 :
使用C作为collate时,实际上是memcmp直接对被比较的字符串的比较,而使用非C的collate则需要COPY字符串的内存,然后使用strcoll_l或strcoll进行比较。
所以建议数据库初始化时使用C,比如 --lc-collate=C --lc-ctype=en_US.utf8
。
Datum
text_gt(PG_FUNCTION_ARGS)
{
text *arg1 = PG_GETARG_TEXT_PP(0);
text *arg2 = PG_GETARG_TEXT_PP(1);
bool result;
result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(result);
}
/* text_cmp()
* Internal comparison function for text strings.
* Returns -1, 0 or 1
*/
static int
text_cmp(text *arg1, text *arg2, Oid collid)
{
char *a1p,
*a2p;
int len1,
len2;
a1p = VARDATA_ANY(arg1);
a2p = VARDATA_ANY(arg2);
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
return varstr_cmp(a1p, len1, a2p, len2, collid);
}
/* varstr_cmp()
* Comparison function for text strings with given lengths.
* Includes locale support, but must copy strings to temporary memory
* to allow null-termination for inputs to strcoll().
* Returns an integer less than, equal to, or greater than zero, indicating
* whether arg1 is less than, equal to, or greater than arg2.
*/
int
varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
{
int result;
/*
* Unfortunately, there is no strncoll(), so in the non-C locale case we
* have to do some memory copying. This turns out to be significantly
* slower, so we optimize the case where LC_COLLATE is C. We also try to
* optimize relatively-short strings by avoiding palloc/pfree overhead.
*/
if (lc_collate_is_c(collid))
{
result = memcmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
else
{
#define STACKBUFLEN 1024
char a1buf[STACKBUFLEN];
char a2buf[STACKBUFLEN];
char *a1p,
*a2p;
#ifdef HAVE_LOCALE_T
pg_locale_t mylocale = 0;
#endif
if (collid != DEFAULT_COLLATION_OID)
{
if (!OidIsValid(collid))
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations, so report it that way.
*/
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for string comparison"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
#ifdef HAVE_LOCALE_T
mylocale = pg_newlocale_from_collation(collid);
#endif
}
#ifdef WIN32
/* Win32 does not have UTF-8, so we need to map to UTF-16 */
if (GetDatabaseEncoding() == PG_UTF8)
{
int a1len;
int a2len;
int r;
if (len1 >= STACKBUFLEN / 2)
{
a1len = len1 * 2 + 2;
a1p = palloc(a1len);
}
else
{
a1len = STACKBUFLEN;
a1p = a1buf;
}
if (len2 >= STACKBUFLEN / 2)
{
a2len = len2 * 2 + 2;
a2p = palloc(a2len);
}
else
{
a2len = STACKBUFLEN;
a2p = a2buf;
}
/* stupid Microsloth API does not work for zero-length input */
if (len1 == 0)
r = 0;
else
{
r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
(LPWSTR) a1p, a1len / 2);
if (!r)
ereport(ERROR,
(errmsg("could not convert string to UTF-16: error code %lu",
GetLastError())));
}
((LPWSTR) a1p)[r] = 0;
if (len2 == 0)
r = 0;
else
{
r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
(LPWSTR) a2p, a2len / 2);
if (!r)
ereport(ERROR,
(errmsg("could not convert string to UTF-16: error code %lu",
GetLastError())));
}
((LPWSTR) a2p)[r] = 0;
errno = 0;
#ifdef HAVE_LOCALE_T
if (mylocale)
result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale);
else
#endif
result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
* headers */
ereport(ERROR,
(errmsg("could not compare Unicode strings: %m")));
/*
* In some locales wcscoll() can claim that nonidentical strings
* are equal. Believing that would be bad news for a number of
* reasons, so we follow Perl's lead and sort "equal" strings
* according to strcmp (on the UTF-8 representation).
*/
if (result == 0)
{
result = memcmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
if (a1p != a1buf)
pfree(a1p);
if (a2p != a2buf)
pfree(a2p);
return result;
}
#endif /* WIN32 */
if (len1 >= STACKBUFLEN)
a1p = (char *) palloc(len1 + 1);
else
a1p = a1buf;
if (len2 >= STACKBUFLEN)
a2p = (char *) palloc(len2 + 1);
else
a2p = a2buf;
memcpy(a1p, arg1, len1);
a1p[len1] = '\0';
memcpy(a2p, arg2, len2);
a2p[len2] = '\0';
#ifdef HAVE_LOCALE_T
if (mylocale)
result = strcoll_l(a1p, a2p, mylocale);
else
#endif
result = strcoll(a1p, a2p);
/*
* In some locales strcoll() can claim that nonidentical strings are
* equal. Believing that would be bad news for a number of reasons,
* so we follow Perl's lead and sort "equal" strings according to
* strcmp().
*/
if (result == 0)
result = strcmp(a1p, a2p);
if (a1p != a1buf)
pfree(a1p);
if (a2p != a2buf)
pfree(a2p);
}
return result;
}
在创建索引时也需要注意这一点,collate一定要和实际SQL中的collate匹配。
postgres=# create table test(id int , info text collate "zh_CN");
CREATE TABLE
postgres=# insert into test select generate_series(1,10000),md5(random()::text);
INSERT 0 10000
postgres=# \l+
List of databases
Name | Owner | Encoding | Collate | Ctype | Access privileges | Size | Tablespace | Description
-----------+----------+----------+---------+-------+-----------------------+---------+------------+--------------------------------------------
postgres | postgres | UTF8 | C | C | | 7960 kB | pg_default | default administrative connection database
template0 | postgres | UTF8 | C | C | =c/postgres +| 7129 kB | pg_default | unmodifiable empty database
| | | | | postgres=CTc/postgres | | |
template1 | postgres | UTF8 | C | C | =c/postgres +| 7129 kB | pg_default | default template for new databases
| | | | | postgres=CTc/postgres | | |
(3 rows)
postgres=# create index idx_test on test(info); -- 建索引不指定collate则使用的是表结构中的collate
CREATE INDEX
postgres=# explain select * from test where info='abc'; -- 查询是不使用collate则使用的是表结构中的collate
QUERY PLAN
----------------------------------------------------------------------
Index Scan using idx_test on test (cost=0.29..8.30 rows=1 width=37)
Index Cond: (info = 'abc'::text)
(2 rows)
postgres=# explain select * from test where info='abc' collate "C"; -- 与索引不一样的collate, 不能走索引
QUERY PLAN
-------------------------------------------------------
Seq Scan on test (cost=0.00..209.00 rows=1 width=37)
Filter: (info = 'abc'::text COLLATE "C")
(2 rows)
postgres=# explain select * from test where info='abc' collate "zh_CN"; -- 与索引一样的collate, 能走索引
QUERY PLAN
----------------------------------------------------------------------
Index Scan using idx_test on test (cost=0.29..8.30 rows=1 width=37)
Index Cond: (info = 'abc'::text COLLATE "zh_CN")
(2 rows)
postgres=# drop index idx_test;
DROP INDEX
postgres=# create index idx_test on test(info collate "C");
CREATE INDEX
postgres=# explain select * from test where info='abc' collate "zh_CN"; -- 与索引不一样的collate, 不能走索引
QUERY PLAN
-------------------------------------------------------
Seq Scan on test (cost=0.00..209.00 rows=1 width=37)
Filter: (info = 'abc'::text COLLATE "zh_CN")
(2 rows)
postgres=# explain select * from test where info='abc' collate "C"; -- 与索引一样的collate, 能走索引
QUERY PLAN
----------------------------------------------------------------------
Index Scan using idx_test on test (cost=0.29..8.30 rows=1 width=37)
Index Cond: (info = 'abc'::text COLLATE "C")
(2 rows)
性能也有一定的差异:
用collate C显然要好一点。
-> pgbench -M prepared -n -r -P 1 -f ./test.sql -c 24 -j 24 -T 10
progress: 1.0 s, 341151.9 tps, lat 0.068 ms stddev 0.028
progress: 2.0 s, 343049.3 tps, lat 0.068 ms stddev 0.032
progress: 3.0 s, 343784.8 tps, lat 0.068 ms stddev 0.025
progress: 4.0 s, 342717.9 tps, lat 0.068 ms stddev 0.040
progress: 5.0 s, 343661.1 tps, lat 0.068 ms stddev 0.027
progress: 6.0 s, 343550.0 tps, lat 0.068 ms stddev 0.026
progress: 7.0 s, 343515.6 tps, lat 0.068 ms stddev 0.024
progress: 8.0 s, 343560.9 tps, lat 0.068 ms stddev 0.029
progress: 9.0 s, 342749.7 tps, lat 0.068 ms stddev 0.044
progress: 10.0 s, 343844.8 tps, lat 0.068 ms stddev 0.024
transaction type: Custom query
scaling factor: 1
query mode: prepared
number of clients: 24
number of threads: 24
duration: 10 s
number of transactions actually processed: 3431607
latency average: 0.068 ms
latency stddev: 0.031 ms
tps = 343145.601594 (including connections establishing)
tps = 343323.296483 (excluding connections establishing)
statement latencies in milliseconds:
0.067984 select 'A'>'a' collate "C";
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 24 -j 24 -T 10
progress: 1.0 s, 330980.7 tps, lat 0.070 ms stddev 0.027
progress: 2.0 s, 331546.4 tps, lat 0.071 ms stddev 0.024
progress: 3.0 s, 333500.7 tps, lat 0.070 ms stddev 0.021
progress: 4.0 s, 333055.7 tps, lat 0.070 ms stddev 0.027
progress: 5.0 s, 332833.6 tps, lat 0.070 ms stddev 0.041
progress: 6.0 s, 329527.4 tps, lat 0.071 ms stddev 0.068
progress: 7.0 s, 330480.3 tps, lat 0.071 ms stddev 0.078
progress: 8.0 s, 333344.7 tps, lat 0.070 ms stddev 0.023
progress: 9.0 s, 333602.3 tps, lat 0.070 ms stddev 0.022
progress: 10.0 s, 332386.7 tps, lat 0.071 ms stddev 0.039
transaction type: Custom query
scaling factor: 1
query mode: prepared
number of clients: 24
number of threads: 24
duration: 10 s
number of transactions actually processed: 3321288
latency average: 0.071 ms
latency stddev: 0.042 ms
tps = 332067.898747 (including connections establishing)
tps = 332233.047809 (excluding connections establishing)
statement latencies in milliseconds:
0.070443 select 'A'>'a' collate "zh_CN";
参考
http://www.postgresql.org/docs/9.5/static/sql-createtable.html
http://www.postgresql.org/docs/9.5/static/sql-altertable.html
http://www.postgresql.org/docs/9.5/static/sql-expressions.html#SQL-SYNTAX-COLLATE-EXPRS
http://www.postgresql.org/docs/9.5/static/sql-createindex.html