don’t mistake the float/double’s display & real value

3 minute read

背景

使用单精或双精类型时, 我们查询出来的值可能与真实存储的值有一定差别.

这里体现了眼见不为实的特征.

以下是float4, float8的输出函数.

src/backend/utils/adt/float.c

/*  
 *              float4out               - converts a float4 number to a string  
 *                                                using a standard output format  
 */  
Datum  
float4out(PG_FUNCTION_ARGS)  
{  
        float4          num = PG_GETARG_FLOAT4(0);  
        char       *ascii = (char *) palloc(MAXFLOATWIDTH + 1);  
  
        if (isnan(num))  
                PG_RETURN_CSTRING(strcpy(ascii, "NaN"));  
  
        switch (is_infinite(num))  
        {  
                case 1:  
                        strcpy(ascii, "Infinity");  
                        break;  
                case -1:  
                        strcpy(ascii, "-Infinity");  
                        break;  
                default:  
                        {  
                                int                     ndig = FLT_DIG + extra_float_digits;  
  
                                if (ndig < 1)  
                                        ndig = 1;  
  
                                snprintf(ascii, MAXFLOATWIDTH + 1, "%.*g", ndig, num);  
                        }  
        }  
  
        PG_RETURN_CSTRING(ascii);  
}  
  
/*  
 *              float8out               - converts float8 number to a string  
 *                                                using a standard output format  
 */  
Datum  
float8out(PG_FUNCTION_ARGS)  
{  
        float8          num = PG_GETARG_FLOAT8(0);  
        char       *ascii = (char *) palloc(MAXDOUBLEWIDTH + 1);  
  
        if (isnan(num))  
                PG_RETURN_CSTRING(strcpy(ascii, "NaN"));  
  
        switch (is_infinite(num))  
        {  
                case 1:  
                        strcpy(ascii, "Infinity");  
                        break;  
                case -1:  
                        strcpy(ascii, "-Infinity");  
                        break;  
                default:  
                        {  
                                int                     ndig = DBL_DIG + extra_float_digits;  
  
                                if (ndig < 1)  
                                        ndig = 1;  
  
                                snprintf(ascii, MAXDOUBLEWIDTH + 1, "%.*g", ndig, num);  
                        }  
        }  
  
        PG_RETURN_CSTRING(ascii);  
}  

以下是numeric的输出函数

src/backend/utils/adt/numeric.c

/*  
 * numeric_out() -  
 *  
 *      Output function for numeric data type  
 */  
Datum  
numeric_out(PG_FUNCTION_ARGS)  
{  
        Numeric         num = PG_GETARG_NUMERIC(0);  
        NumericVar      x;  
        char       *str;  
  
        /*  
         * Handle NaN  
         */  
        if (NUMERIC_IS_NAN(num))  
                PG_RETURN_CSTRING(pstrdup("NaN"));  
  
        /*  
         * Get the number in the variable format.  
         */  
        init_var_from_num(num, &x);  
  
        str = get_str_from_var(&x);  
  
        PG_RETURN_CSTRING(str);  
}  

下面来做一个简单的测试 :

postgres=# create table t3(c1 float, c2 numeric);  
CREATE TABLE  
postgres=# \d t3  
           Table "public.t3"  
 Column |       Type       | Modifiers   
--------+------------------+-----------  
 c1     | double precision |   
 c2     | numeric          |   
  
postgres=# insert into t3 values (1.55555555555555555555555555555555555, 1.55555555555555555555555555555555555);  
INSERT 0 1  
postgres=# select * from t3;  
        c1        |                  c2                     
------------------+---------------------------------------  
 1.55555555555556 | 1.55555555555555555555555555555555555  
(1 row)  

从以上结果看, 我们很容易被误导, 以为c1存储的是1.55555555555556, 其实c1存储的值并不是1.55555555555556, 而是通过snprintf 打印的失真后的字符串.

所以这个查询是没有结果的 :

postgres=# select * from t3 where c1>=1.55555555555556;  
 c1 | c2   
----+----  
(0 rows)  

怎样让他有结果呢?

必须把输出的字符在转成numeric, 就有结果了 :

转成numeric后, 就是真的1.55555555555556了.

postgres=# select * from t3 where c1::numeric>=1.55555555555556;  
        c1        |                  c2                     
------------------+---------------------------------------  
 1.55555555555556 | 1.55555555555555555555555555555555555  
(1 row)  

其实explain 的输出也采用了float8out, 看以下SQL, Filter 里面用到了转换.

postgres=# explain select * from t3 where c1>=1.5555555555555555555555555555555555555555555555555555555555555555555555555555;  
                      QUERY PLAN                         
-------------------------------------------------------  
 Seq Scan on t3  (cost=0.00..68.38 rows=1557 width=40)  
   Filter: (c1 >= 1.55555555555556::double precision)  
(2 rows)  

对于精度要求比较高的场景建议使用numeric来存储, 以免出现以上问题.

逻辑复制也需要注意这个问题,如果我们使用字符串来传输需要复制的SQL,就有这样的问题,如果是取字段的二进制表述则不会有这样的问题。

postgres=# SELECT * FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');  
    slot_name    | xlog_position   
-----------------+---------------  
 regression_slot | 0/4C4A260  
(1 row)  
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);  
 location | xid | data   
----------+-----+------  
(0 rows)  
postgres=# create table test(c1 float);  
CREATE TABLE  
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);  
 location  | xid  |    data       
-----------+------+-------------  
 0/4C4A2F0 | 9723 | BEGIN 9723  
 0/4C5A6D0 | 9723 | COMMIT 9723  
(2 rows)  
  
postgres=# insert into test values (1.5555555555555555555555);  
INSERT 0 1  
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);  
 location  | xid  |                               data                                 
-----------+------+------------------------------------------------------------------  
 0/4C5A6D0 | 9724 | BEGIN 9724  
 0/4C5A6D0 | 9724 | table public.test: INSERT: c1[double precision]:1.55555555555556  
 0/4C5A758 | 9724 | COMMIT 9724  
(3 rows)  

这个问题与pg_dump一样,都可以用提升精度的方法解决:

postgres=# set extra_float_digits =3;  
SET  
postgres=# insert into test values (1.5555555555555555555555);  
INSERT 0 1  
postgres=# SELECT * FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL);  
 location  | xid  |                                data                                   
-----------+------+---------------------------------------------------------------------  
 0/4C5A978 | 9725 | BEGIN 9725  
 0/4C5A978 | 9725 | table public.test: INSERT: c1[double precision]:1.55555555555555558  
 0/4C5AA78 | 9725 | COMMIT 9725  
(3 rows)  

提升精度后,两者的二进制表述一致。

postgres=# select float8send(float '1.5555555555555555555555');  
     float8send       
--------------------  
 \x3ff8e38e38e38e39  
(1 row)  
  
postgres=# select float8send(float '1.55555555555555558');  
     float8send       
--------------------  
 \x3ff8e38e38e38e39  
(1 row)  

例如:

postgres=# select float8send(1.555555555555555555555);  
     float8send       
--------------------  
 \x3ff8e38e38e38e39  
(1 row)  
postgres=# select float4send(1.555555555555555555555);  
 float4send   
------------  
 \x3fc71c72  
(1 row)  
postgres=# select numeric_send(1.555555555555555555555);  
                  numeric_send                    
------------------------------------------------  
 \x0007000000000015000115b315b315b315b315b31388  
(1 row)  
  
postgres=# select numeric_send(1.5555555555555555555555);  
                  numeric_send                    
------------------------------------------------  
 \x0007000000000016000115b315b315b315b315b3157c  
(1 row)  
  
postgres=# select numeric_send(1.55555555555555555555555555555555555555555555555555555555555);  
                                    numeric_send                                      
------------------------------------------------------------------------------------  
 \x001000000000003b000115b315b315b315b315b315b315b315b315b315b315b315b315b315b315ae  
(1 row)  

使用send函数也可以很方便的了解占用的字节数。

src/backup/util/adt/float.c

/*  
 *              float4recv                      - converts external binary format to float4  
 */  
Datum  
float4recv(PG_FUNCTION_ARGS)  
{  
        StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);  
  
        PG_RETURN_FLOAT4(pq_getmsgfloat4(buf));  
}  
  
/*  
 *              float4send                      - converts float4 to binary format  
 */  
Datum  
float4send(PG_FUNCTION_ARGS)  
{  
        float4          num = PG_GETARG_FLOAT4(0);  
        StringInfoData buf;  
  
        pq_begintypsend(&buf);  
        pq_sendfloat4(&buf, num);  
        PG_RETURN_BYTEA_P(pq_endtypsend(&buf));  
}  

src/backend/libpq/pqformat.c

/* --------------------------------  
 *              pq_sendfloat4   - append a float4 to a StringInfo buffer  
 *  
 * The point of this routine is to localize knowledge of the external binary  
 * representation of float4, which is a component of several datatypes.  
 *  
 * We currently assume that float4 should be byte-swapped in the same way  
 * as int4.  This rule is not perfect but it gives us portability across  
 * most IEEE-float-using architectures.  
 * --------------------------------  
 */  
void  
pq_sendfloat4(StringInfo buf, float4 f)  
{  
        union  
        {  
                float4          f;  
                uint32          i;  
        }                       swap;  
  
        swap.f = f;  
        swap.i = htonl(swap.i);  
  
        appendBinaryStringInfo(buf, (char *) &swap.i, 4);  
}  

对应逻辑备份,pg_dump做了一个处理,提高精度。

	/*  
         * If supported, set extra_float_digits so that we can dump float data  
         * exactly (given correctly implemented float I/O code, anyway)  
         */  
        if (AH->remoteVersion >= 90000)  
                ExecuteSqlStatement(AH, "SET extra_float_digits TO 3");  
        else if (AH->remoteVersion >= 70400)  
                ExecuteSqlStatement(AH, "SET extra_float_digits TO 2");  

如果要完美解决这个问题,使用pg_dump输出结果,转换为bytea和原始值是一致的。

Flag Counter

digoal’s 大量PostgreSQL文章入口