Greenplum,HAWQ interval parser带来的问题 - TPCH 测试注意啦

3 minute read

背景

interval是用来表达时间间隔的数据类型,比如1年,或者1分钟,或者1天零多少小时分钟等。

postgres=# select interval '100 year 2 month 1 day 1:00:01.11'; 
              interval              
------------------------------------
 100 years 2 mons 1 day 01:00:01.11
(1 row)

interval可以与时间,日期类型加减。

postgres=# select now()+interval '100 year 2 month 1 day 1:00:01.11'; 
           ?column?            
-------------------------------
 2116-12-12 20:06:48.391422+08
(1 row)

interval的用法可参考

https://www.postgresql.org/docs/9.6/static/functions-datetime.html

interval parser不同版本的差异

1. PostgreSQL 8.3以及以前的版本不能解释放在单引号外面的单位

$psql -h 127.0.0.1 -p 35432 -U digoal postgres
psql (8.3.23)
Type "help" for help.

postgres=# select now(), now()+interval '1 year', now()+interval '1' year;
              now              |           ?column?            |           ?column?            
-------------------------------+-------------------------------+-------------------------------
 2016-10-11 19:02:46.881375+08 | 2017-10-11 19:02:46.881375+08 | 2016-10-11 19:02:46.881375+08
(1 row)
postgres=# select interval '100' year; 
 interval 
----------
 00:00:00
(1 row)

2. 8.4以及以后的版本则支持放在外面的单位的写法。

psql (9.4.9)
Type "help" for help.


postgres=# select now(), now()+interval '1 year', now()+interval '1' year;
              now              |           ?column?            |           ?column?            
-------------------------------+-------------------------------+-------------------------------
 2016-10-11 19:08:29.365853+08 | 2017-10-11 19:08:29.365853+08 | 2017-10-11 19:08:29.365853+08
(1 row)

postgres=# select interval '100' year; 
 interval  
-----------
 100 years
(1 row)

postgres=# select interval '100' hour; 
 interval  
-----------
 100:00:00
(1 row)

patch在这里
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=70530c808bf8eaba2a41a28c9dc7b96dcc3b6c51

Adjust the parser to accept the typename syntax INTERVAL ... SECOND(n)
and the literal syntax INTERVAL 'string' ... SECOND(n), as required by the
SQL standard.  Our old syntax put (n) directly after INTERVAL, which was
a mistake, but will still be accepted for backward compatibility as well
as symmetry with the TIMESTAMP cases.

Change intervaltypmodout to show it in the spec's way, too.  (This could
potentially affect clients, if there are any that analyze the typmod of an
INTERVAL in any detail.)

Also fix interval input to handle 'min:sec.frac' properly; I had overlooked
this case in my previous patch.

Document the use of the interval fields qualifier, which up to now we had
never mentioned in the docs.  (I think the omission was intentional because
it didn't work per spec; but it does now, or at least close enough to be
credible.)

tpch的QUERY造句

tpch的dbgen产生的query用的是带单位的写法,导致没有出现加减。

涉及的SQL如下

10.explain.sql: and o_orderdate < date '1993-04-01' + interval '3' month
12.explain.sql: and l_receiptdate < date '1995-01-01' + interval '1' year
14.explain.sql: and l_shipdate < date '1995-08-01' + interval '1' month
15.explain.sql:         and l_shipdate < date '1997-03-01' + interval '3' month
1.explain.sql:  l_shipdate <= date '1998-12-01' - interval '78' day
20.explain.sql:                                 and l_shipdate < date '1994-01-01' + interval '1' year
4.explain.sql:  and o_orderdate < date '1995-03-01' + interval '3' month
5.explain.sql:  and o_orderdate < date '1997-01-01' + interval '1' year
6.explain.sql:  and l_shipdate < date '1997-01-01' + interval '1' year

例子

-- using 1474112033 as a seed to the RNG


select
        l_returnflag,
        l_linestatus,
        sum(l_quantity) as sum_qty,
        sum(l_extendedprice) as sum_base_price,
        sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
        avg(l_quantity) as avg_qty,
        avg(l_extendedprice) as avg_price,
        avg(l_discount) as avg_disc,
        count(*) as count_order
from
        lineitem
where
        l_shipdate <= date '1998-12-01' - interval '78' day
group by
        l_returnflag,
        l_linestatus
order by
        l_returnflag,
        l_linestatus
LIMIT 1;

这些SQL直接影响了TPCH的测试结果。

请务必修正query后再执行。

interval 的 io函数

src/backend/utils/adt/timestamp.c

/* interval_in()
 * Convert a string to internal form.
 *
 * External format(s):
 *      Uses the generic date/time parsing and decoding routines.
 */
Datum
interval_in(PG_FUNCTION_ARGS)
{
        char       *str = PG_GETARG_CSTRING(0);

#ifdef NOT_USED
        Oid                     typelem = PG_GETARG_OID(1);
#endif
        int32           typmod = PG_GETARG_INT32(2);
        Interval   *result;
        fsec_t          fsec;
        struct pg_tm tt,
                           *tm = &tt;
        int                     dtype;
        int                     nf;
        int                     range;
        int                     dterr;
        char       *field[MAXDATEFIELDS];
        int                     ftype[MAXDATEFIELDS];
        char            workbuf[256];

        tm->tm_year = 0;
        tm->tm_mon = 0;
        tm->tm_mday = 0;
        tm->tm_hour = 0;
        tm->tm_min = 0;
        tm->tm_sec = 0;
        fsec = 0;

        if (typmod >= 0)
                range = INTERVAL_RANGE(typmod);
        else
                range = INTERVAL_FULL_RANGE;

        dterr = ParseDateTime(str, workbuf, sizeof(workbuf), field,
                                                  ftype, MAXDATEFIELDS, &nf);
        if (dterr == 0)
                dterr = DecodeInterval(field, ftype, nf, range,
                                                           &dtype, tm, &fsec);

        /* if those functions think it's a bad format, try ISO8601 style */
        if (dterr == DTERR_BAD_FORMAT)
                dterr = DecodeISO8601Interval(str,
                                                                          &dtype, tm, &fsec);

        if (dterr != 0)
        {
                if (dterr == DTERR_FIELD_OVERFLOW)
                        dterr = DTERR_INTERVAL_OVERFLOW;
                DateTimeParseError(dterr, str, "interval");
        }

        result = (Interval *) palloc(sizeof(Interval));

        switch (dtype)
        {
                case DTK_DELTA:
                        if (tm2interval(tm, fsec, result) != 0)
                                ereport(ERROR,
                                                (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
                                                 errmsg("interval out of range")));
                        break;

                case DTK_INVALID:
                        ereport(ERROR,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                          errmsg("date/time value \"%s\" is no longer supported", str)));
                        break;

                default:
                        elog(ERROR, "unexpected dtype %d while parsing interval \"%s\"",
                                 dtype, str);
        }

        AdjustIntervalForTypmod(result, typmod);

        PG_RETURN_INTERVAL_P(result);
}

/* interval_out()
 * Convert a time span to external form.
 */
Datum
interval_out(PG_FUNCTION_ARGS)
{
        Interval   *span = PG_GETARG_INTERVAL_P(0);
        char       *result;
        struct pg_tm tt,
                           *tm = &tt;
        fsec_t          fsec;
        char            buf[MAXDATELEN + 1];

        if (interval2tm(*span, tm, &fsec) != 0)
                elog(ERROR, "could not convert interval to tm");

        EncodeInterval(tm, fsec, IntervalStyle, buf);

        result = pstrdup(buf);
        PG_RETURN_CSTRING(result);
}

Flag Counter

digoal’s 大量PostgreSQL文章入口