don’t mistake the float/double’s display & real value
背景
使用单精或双精类型时, 我们查询出来的值可能与真实存储的值有一定差别.
这里体现了眼见不为实的特征.
以下是float4, float8的输出函数.
src/backend/utils/adt/float.c
/*
* float4out - converts a float4 number to a string
* using a standard output format
*/
Datum
float4out(PG_FUNCTION_ARGS)
{
float4 num = PG_GETARG_FLOAT4(0);
char *ascii = (char *) palloc(MAXFLOATWIDTH + 1);
if (isnan(num))
PG_RETURN_CSTRING(strcpy(ascii, "NaN"));
switch (is_infinite(num))
{
case 1:
strcpy(ascii, "Infinity");
break;
case -1:
strcpy(ascii, "-Infinity");
break;
default:
{
int ndig = FLT_DIG + extra_float_digits;
if (ndig < 1)
ndig = 1;
snprintf(ascii, MAXFLOATWIDTH + 1, "%.*g", ndig, num);
}
}
PG_RETURN_CSTRING(ascii);
}
/*
* float8out - converts float8 number to a string
* using a standard output format
*/
Datum
float8out(PG_FUNCTION_ARGS)
{
float8 num = PG_GETARG_FLOAT8(0);
char *ascii = (char *) palloc(MAXDOUBLEWIDTH + 1);
if (isnan(num))
PG_RETURN_CSTRING(strcpy(ascii, "NaN"));
switch (is_infinite(num))
{
case 1:
strcpy(ascii, "Infinity");
break;
case -1:
strcpy(ascii, "-Infinity");
break;
default:
{
int ndig = DBL_DIG + extra_float_digits;
if (ndig < 1)
ndig = 1;
snprintf(ascii, MAXDOUBLEWIDTH + 1, "%.*g", ndig, num);
}
}
PG_RETURN_CSTRING(ascii);
}
以下是numeric的输出函数
src/backend/utils/adt/numeric.c
/*
* numeric_out() -
*
* Output function for numeric data type
*/
Datum
numeric_out(PG_FUNCTION_ARGS)
{
Numeric num = PG_GETARG_NUMERIC(0);
NumericVar x;
char *str;
/*
* Handle NaN
*/
if (NUMERIC_IS_NAN(num))
PG_RETURN_CSTRING(pstrdup("NaN"));
/*
* Get the number in the variable format.
*/
init_var_from_num(num, &x);
str = get_str_from_var(&x);
PG_RETURN_CSTRING(str);
}
下面来做一个简单的测试 :
postgres=# create table t3(c1 float, c2 numeric);
CREATE TABLE
postgres=# \d t3
Table "public.t3"
Column | Type | Modifiers
--------+------------------+-----------
c1 | double precision |
c2 | numeric |
postgres=# insert into t3 values (1.55555555555555555555555555555555555, 1.55555555555555555555555555555555555);
INSERT 0 1
postgres=# select * from t3;
c1 | c2
------------------+---------------------------------------
1.55555555555556 | 1.55555555555555555555555555555555555
(1 row)
从以上结果看, 我们很容易被误导, 以为c1存储的是1.55555555555556, 其实c1存储的值并不是1.55555555555556, 而是通过snprintf 打印的失真后的字符串.
所以这个查询是没有结果的 :
postgres=# select * from t3 where c1>=1.55555555555556;
c1 | c2
----+----
(0 rows)
怎样让他有结果呢?
必须把输出的字符在转成numeric, 就有结果了 :
转成numeric后, 就是真的1.55555555555556了.
postgres=# select * from t3 where c1::numeric>=1.55555555555556;
c1 | c2
------------------+---------------------------------------
1.55555555555556 | 1.55555555555555555555555555555555555
(1 row)
其实explain 的输出也采用了float8out, 看以下SQL, Filter 里面用到了转换.
postgres=# explain select * from t3 where c1>=1.5555555555555555555555555555555555555555555555555555555555555555555555555555;
QUERY PLAN
-------------------------------------------------------
Seq Scan on t3 (cost=0.00..68.38 rows=1557 width=40)
Filter: (c1 >= 1.55555555555556::double precision)
(2 rows)
对于精度要求比较高的场景建议使用numeric来存储, 以免出现以上问题.
逻辑复制也需要注意这个问题,如果我们使用字符串来传输需要复制的SQL,就有这样的问题,如果是取字段的二进制表述则不会有这样的问题。
postgres=# SELECT * FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
slot_name | xlog_position
-----------------+---------------
regression_slot | 0/4C4A260
(1 row)
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);
location | xid | data
----------+-----+------
(0 rows)
postgres=# create table test(c1 float);
CREATE TABLE
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);
location | xid | data
-----------+------+-------------
0/4C4A2F0 | 9723 | BEGIN 9723
0/4C5A6D0 | 9723 | COMMIT 9723
(2 rows)
postgres=# insert into test values (1.5555555555555555555555);
INSERT 0 1
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);
location | xid | data
-----------+------+------------------------------------------------------------------
0/4C5A6D0 | 9724 | BEGIN 9724
0/4C5A6D0 | 9724 | table public.test: INSERT: c1[double precision]:1.55555555555556
0/4C5A758 | 9724 | COMMIT 9724
(3 rows)
这个问题与pg_dump一样,都可以用提升精度的方法解决:
postgres=# set extra_float_digits =3;
SET
postgres=# insert into test values (1.5555555555555555555555);
INSERT 0 1
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);
location | xid | data
-----------+------+---------------------------------------------------------------------
0/4C5A978 | 9725 | BEGIN 9725
0/4C5A978 | 9725 | table public.test: INSERT: c1[double precision]:1.55555555555555558
0/4C5AA78 | 9725 | COMMIT 9725
(3 rows)
提升精度后,两者的二进制表述一致。
postgres=# select float8send(float '1.5555555555555555555555');
float8send
--------------------
\x3ff8e38e38e38e39
(1 row)
postgres=# select float8send(float '1.55555555555555558');
float8send
--------------------
\x3ff8e38e38e38e39
(1 row)
例如:
postgres=# select float8send(1.555555555555555555555);
float8send
--------------------
\x3ff8e38e38e38e39
(1 row)
postgres=# select float4send(1.555555555555555555555);
float4send
------------
\x3fc71c72
(1 row)
postgres=# select numeric_send(1.555555555555555555555);
numeric_send
------------------------------------------------
\x0007000000000015000115b315b315b315b315b31388
(1 row)
postgres=# select numeric_send(1.5555555555555555555555);
numeric_send
------------------------------------------------
\x0007000000000016000115b315b315b315b315b3157c
(1 row)
postgres=# select numeric_send(1.55555555555555555555555555555555555555555555555555555555555);
numeric_send
------------------------------------------------------------------------------------
\x001000000000003b000115b315b315b315b315b315b315b315b315b315b315b315b315b315b315ae
(1 row)
使用send函数也可以很方便的了解占用的字节数。
src/backup/util/adt/float.c
/*
* float4recv - converts external binary format to float4
*/
Datum
float4recv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
PG_RETURN_FLOAT4(pq_getmsgfloat4(buf));
}
/*
* float4send - converts float4 to binary format
*/
Datum
float4send(PG_FUNCTION_ARGS)
{
float4 num = PG_GETARG_FLOAT4(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendfloat4(&buf, num);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
src/backend/libpq/pqformat.c
/* --------------------------------
* pq_sendfloat4 - append a float4 to a StringInfo buffer
*
* The point of this routine is to localize knowledge of the external binary
* representation of float4, which is a component of several datatypes.
*
* We currently assume that float4 should be byte-swapped in the same way
* as int4. This rule is not perfect but it gives us portability across
* most IEEE-float-using architectures.
* --------------------------------
*/
void
pq_sendfloat4(StringInfo buf, float4 f)
{
union
{
float4 f;
uint32 i;
} swap;
swap.f = f;
swap.i = htonl(swap.i);
appendBinaryStringInfo(buf, (char *) &swap.i, 4);
}
对应逻辑备份,pg_dump做了一个处理,提高精度。
/*
* If supported, set extra_float_digits so that we can dump float data
* exactly (given correctly implemented float I/O code, anyway)
*/
if (AH->remoteVersion >= 90000)
ExecuteSqlStatement(AH, "SET extra_float_digits TO 3");
else if (AH->remoteVersion >= 70400)
ExecuteSqlStatement(AH, "SET extra_float_digits TO 2");
如果要完美解决这个问题,使用pg_dump输出结果,转换为bytea和原始值是一致的。