Greenplum,HAWQ interval parser带来的问题 - TPCH 测试注意啦
背景
interval是用来表达时间间隔的数据类型,比如1年,或者1分钟,或者1天零多少小时分钟等。
postgres=# select interval '100 year 2 month 1 day 1:00:01.11';
interval
------------------------------------
100 years 2 mons 1 day 01:00:01.11
(1 row)
interval可以与时间,日期类型加减。
postgres=# select now()+interval '100 year 2 month 1 day 1:00:01.11';
?column?
-------------------------------
2116-12-12 20:06:48.391422+08
(1 row)
interval的用法可参考
https://www.postgresql.org/docs/9.6/static/functions-datetime.html
interval parser不同版本的差异
1. PostgreSQL 8.3以及以前的版本不能解释放在单引号外面的单位
$psql -h 127.0.0.1 -p 35432 -U digoal postgres
psql (8.3.23)
Type "help" for help.
postgres=# select now(), now()+interval '1 year', now()+interval '1' year;
now | ?column? | ?column?
-------------------------------+-------------------------------+-------------------------------
2016-10-11 19:02:46.881375+08 | 2017-10-11 19:02:46.881375+08 | 2016-10-11 19:02:46.881375+08
(1 row)
postgres=# select interval '100' year;
interval
----------
00:00:00
(1 row)
2. 8.4以及以后的版本则支持放在外面的单位的写法。
psql (9.4.9)
Type "help" for help.
postgres=# select now(), now()+interval '1 year', now()+interval '1' year;
now | ?column? | ?column?
-------------------------------+-------------------------------+-------------------------------
2016-10-11 19:08:29.365853+08 | 2017-10-11 19:08:29.365853+08 | 2017-10-11 19:08:29.365853+08
(1 row)
postgres=# select interval '100' year;
interval
-----------
100 years
(1 row)
postgres=# select interval '100' hour;
interval
-----------
100:00:00
(1 row)
patch在这里
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=70530c808bf8eaba2a41a28c9dc7b96dcc3b6c51
Adjust the parser to accept the typename syntax INTERVAL ... SECOND(n)
and the literal syntax INTERVAL 'string' ... SECOND(n), as required by the
SQL standard. Our old syntax put (n) directly after INTERVAL, which was
a mistake, but will still be accepted for backward compatibility as well
as symmetry with the TIMESTAMP cases.
Change intervaltypmodout to show it in the spec's way, too. (This could
potentially affect clients, if there are any that analyze the typmod of an
INTERVAL in any detail.)
Also fix interval input to handle 'min:sec.frac' properly; I had overlooked
this case in my previous patch.
Document the use of the interval fields qualifier, which up to now we had
never mentioned in the docs. (I think the omission was intentional because
it didn't work per spec; but it does now, or at least close enough to be
credible.)
tpch的QUERY造句
tpch的dbgen产生的query用的是带单位的写法,导致没有出现加减。
涉及的SQL如下
10.explain.sql: and o_orderdate < date '1993-04-01' + interval '3' month
12.explain.sql: and l_receiptdate < date '1995-01-01' + interval '1' year
14.explain.sql: and l_shipdate < date '1995-08-01' + interval '1' month
15.explain.sql: and l_shipdate < date '1997-03-01' + interval '3' month
1.explain.sql: l_shipdate <= date '1998-12-01' - interval '78' day
20.explain.sql: and l_shipdate < date '1994-01-01' + interval '1' year
4.explain.sql: and o_orderdate < date '1995-03-01' + interval '3' month
5.explain.sql: and o_orderdate < date '1997-01-01' + interval '1' year
6.explain.sql: and l_shipdate < date '1997-01-01' + interval '1' year
例子
-- using 1474112033 as a seed to the RNG
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '78' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus
LIMIT 1;
这些SQL直接影响了TPCH的测试结果。
请务必修正query后再执行。
interval 的 io函数
src/backend/utils/adt/timestamp.c
/* interval_in()
* Convert a string to internal form.
*
* External format(s):
* Uses the generic date/time parsing and decoding routines.
*/
Datum
interval_in(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
#ifdef NOT_USED
Oid typelem = PG_GETARG_OID(1);
#endif
int32 typmod = PG_GETARG_INT32(2);
Interval *result;
fsec_t fsec;
struct pg_tm tt,
*tm = &tt;
int dtype;
int nf;
int range;
int dterr;
char *field[MAXDATEFIELDS];
int ftype[MAXDATEFIELDS];
char workbuf[256];
tm->tm_year = 0;
tm->tm_mon = 0;
tm->tm_mday = 0;
tm->tm_hour = 0;
tm->tm_min = 0;
tm->tm_sec = 0;
fsec = 0;
if (typmod >= 0)
range = INTERVAL_RANGE(typmod);
else
range = INTERVAL_FULL_RANGE;
dterr = ParseDateTime(str, workbuf, sizeof(workbuf), field,
ftype, MAXDATEFIELDS, &nf);
if (dterr == 0)
dterr = DecodeInterval(field, ftype, nf, range,
&dtype, tm, &fsec);
/* if those functions think it's a bad format, try ISO8601 style */
if (dterr == DTERR_BAD_FORMAT)
dterr = DecodeISO8601Interval(str,
&dtype, tm, &fsec);
if (dterr != 0)
{
if (dterr == DTERR_FIELD_OVERFLOW)
dterr = DTERR_INTERVAL_OVERFLOW;
DateTimeParseError(dterr, str, "interval");
}
result = (Interval *) palloc(sizeof(Interval));
switch (dtype)
{
case DTK_DELTA:
if (tm2interval(tm, fsec, result) != 0)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("interval out of range")));
break;
case DTK_INVALID:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("date/time value \"%s\" is no longer supported", str)));
break;
default:
elog(ERROR, "unexpected dtype %d while parsing interval \"%s\"",
dtype, str);
}
AdjustIntervalForTypmod(result, typmod);
PG_RETURN_INTERVAL_P(result);
}
/* interval_out()
* Convert a time span to external form.
*/
Datum
interval_out(PG_FUNCTION_ARGS)
{
Interval *span = PG_GETARG_INTERVAL_P(0);
char *result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
char buf[MAXDATELEN + 1];
if (interval2tm(*span, tm, &fsec) != 0)
elog(ERROR, "could not convert interval to tm");
EncodeInterval(tm, fsec, IntervalStyle, buf);
result = pstrdup(buf);
PG_RETURN_CSTRING(result);
}