PostgreSQL SQL log duration time 源码分析
背景
PostgreSQL 可以通过参数设置是否要记录SQL的执行时间,以及执行时间超过多少的SQL。
注意这里的执行时间实际上包含了网络的传输时间。
所以在遇到慢查询时,除了要排查数据库的问题,实际上还需要排查网络以及客户端的问题,因为客户端接收数据堵塞也会造成慢查询,就像我前天写的文章。
PostgreSQL IDE pgadmin , edb postgres enterprise manager 查询慢的问题分析
https://yq.aliyun.com/articles/32438
另外需要指出的是,PostgreSQL的内核在这方面有改进的空间,最好是把网络传输的时间另外计算。
这样更容易排查问题。
如果要将网络时间另外计算,需要hack一下内核的postgres.c中的几个函数,文章后面会分析。
测试
在数据库中创建表和测试数据
postgres=> create table tbl(id int);
CREATE TABLE
postgres=> insert into tbl select generate_series(1,200000);
INSERT 0 200000
postgres=> \dt+ tbl
List of relations
Schema | Name | Type | Owner | Size | Description
--------+------+-------+-------+---------+-------------
public | tbl | table | test | 7104 kB |
(1 row)
确保打开了慢查询的审计日志
postgres=> show log_min_duration_statement ;
log_min_duration_statement
----------------------------
1s
(1 row)
在数据库所在服务器的本地执行如下查询,很快就返回
digoal@localhost-> date; psql -c "select * from tbl" >/dev/null ; date;
Fri Apr 22 11:59:52 CST 2016
Fri Apr 22 11:59:53 CST 2016
开启一个比较慢的网络,例如手机的2G网络,然后通过手机上网连接到数据库,执行同样的SQL,耗时变长了,因为网络不稳定,时快时慢。
digoal@digoal01-> date ; psql -h remote_host -p 1921 -U test postgres -c "select * from tbl" >/dev/null ; date;
Fri Apr 22 12:31:08 CST 2016
Fri Apr 22 12:31:18 CST 2016
digoal@digoal01-> date ; psql -h remote_host -p 1921 -U test postgres -c "select * from tbl limit 20000" >/dev/null ; date;
Fri Apr 22 12:34:30 CST 2016
Fri Apr 22 12:34:47 CST 2016
在数据库的日志中,可以看到慢查询的审计日志,耗时包含了SQL数据库将数据传输到客户端的时间。
2016-04-22 12:33:13.112 CST,"test","postgres",2680,"36.16.129.195:11812",5719a97f.a78,3,"SELECT",2016-04-22 12:33:03 CST,7/0,0,LOG,00000,"duration: 8300.129 ms statement: select * from tbl limit 20000",,,,,,,,"exec_simple_query, postgres.c:1149","psql"
对应的代码
src/backend/tcop/postgres.c
check_log_duration
检查是否需要输出duration。
同时计算从语句时间开始到当前时间的一个时间差,也就是SQL的duration。
有4个接口会记录时间
exec_execute_message 使用绑定变量时, execute sql的时间
exec_bind_message 使用绑定变量时, bind的时间
exec_parse_message 使用绑定变量时, parse的时间
exec_simple_query 未使用绑定变量时,执行SQL的时间
这几个函数的代码如下
/*
* check_log_duration
* Determine whether current command's duration should be logged
*
* Returns:
* 0 if no logging is needed, 不需要记录SQL时间
* 1 if just the duration should be logged, 需要记录SQL时间, 但是不需要记录SQL详情
* 2 if duration and query details should be logged, 需要记录SQL时间,同时需要记录SQL 详情
*
* If logging is needed, the duration in msec is formatted into msec_str[],
* which must be a 32-byte buffer.
*
* was_logged should be TRUE if caller already logged query details (this
* essentially prevents 2 from being returned).
*/
int
check_log_duration(char *msec_str, bool was_logged)
{
if (log_duration || log_min_duration_statement >= 0)
{
long secs;
int usecs;
int msecs;
bool exceeded;
TimestampDifference(GetCurrentStatementStartTimestamp(),
GetCurrentTimestamp(),
&secs, &usecs); // 语句开始到当前的时间
msecs = usecs / 1000;
/*
* This odd-looking test for log_min_duration_statement being exceeded
* is designed to avoid integer overflow with very long durations:
* don't compute secs * 1000 until we've verified it will fit in int.
*/
exceeded = (log_min_duration_statement == 0 ||
(log_min_duration_statement > 0 &&
(secs > log_min_duration_statement / 1000 ||
secs * 1000 + msecs >= log_min_duration_statement)));
if (exceeded || log_duration)
{
snprintf(msec_str, 32, "%ld.%03d",
secs * 1000 + msecs, usecs % 1000);
if (exceeded && !was_logged)
return 2;
else
return 1;
}
}
return 0;
}
simple exec
/*
* exec_simple_query
*
* Execute a "simple Query" protocol message.
*/
static void
exec_simple_query(const char *query_string)
{
CommandDest dest = whereToSendOutput;
MemoryContext oldcontext;
List *parsetree_list;
ListCell *parsetree_item;
bool save_log_statement_stats = log_statement_stats;
bool was_logged = false;
bool isTopLevel;
char msec_str[32];
...
/*
* Create unnamed portal to run the query or queries in. If there
* already is one, silently drop it.
*/
portal = CreatePortal("", true, true);
/* Don't display the portal in pg_cursors */
portal->visible = false;
/*
* We don't have to copy anything into the portal, because everything
* we are passing here is in MessageContext, which will outlive the
* portal anyway.
*/
PortalDefineQuery(portal,
NULL,
query_string,
commandTag,
plantree_list,
NULL);
/*
* Start the portal. No parameters here.
*/
PortalStart(portal, NULL, 0, InvalidSnapshot);
/*
* Select the appropriate output format: text unless we are doing a
* FETCH from a binary cursor. (Pretty grotty to have to do this here
* --- but it avoids grottiness in other places. Ah, the joys of
* backward compatibility...)
*/
format = 0; /* TEXT is default */
if (IsA(parsetree, FetchStmt))
{
FetchStmt *stmt = (FetchStmt *) parsetree;
if (!stmt->ismove)
{
Portal fportal = GetPortalByName(stmt->portalname);
if (PortalIsValid(fportal) &&
(fportal->cursorOptions & CURSOR_OPT_BINARY))
format = 1; /* BINARY */
}
}
PortalSetResultFormat(portal, 1, &format);
/*
* Now we can create the destination receiver object.
*/
receiver = CreateDestReceiver(dest);
if (dest == DestRemote)
SetRemoteDestReceiverParams(receiver, portal);
/*
* Switch back to transaction context for execution.
*/
MemoryContextSwitchTo(oldcontext);
/*
* Run the portal to completion, and then drop it (and the receiver).
*/
(void) PortalRun(portal,
FETCH_ALL,
isTopLevel,
receiver,
receiver,
completionTag);
(*receiver->rDestroy) (receiver);
PortalDrop(portal, false);
if (IsA(parsetree, TransactionStmt))
{
/*
* If this was a transaction control statement, commit it. We will
* start a new xact command for the next command (if any).
*/
finish_xact_command();
}
...
/*
* Close down transaction statement, if one is open.
*/
finish_xact_command();
/*
* If there were no parsetrees, return EmptyQueryResponse message.
*/
if (!parsetree_list)
NullCommand(dest);
/*
* Emit duration logging if appropriate.
*/
switch (check_log_duration(msec_str, was_logged))
{
case 1:
ereport(LOG,
(errmsg("duration: %s ms", msec_str),
errhidestmt(true)));
break;
case 2:
ereport(LOG,
(errmsg("duration: %s ms statement: %s",
msec_str, query_string),
errhidestmt(true),
errdetail_execute(parsetree_list)));
break;
}
if (save_log_statement_stats)
ShowUsage("QUERY STATISTICS");
TRACE_POSTGRESQL_QUERY_DONE(query_string);
debug_query_string = NULL;
}
parse
/*
* exec_parse_message
*
* Execute a "Parse" protocol message.
*/
static void
exec_parse_message(const char *query_string, /* string to execute */
const char *stmt_name, /* name for prepared stmt */
Oid *paramTypes, /* parameter types */
int numParams) /* number of parameters */
{
MemoryContext unnamed_stmt_context = NULL;
MemoryContext oldcontext;
List *parsetree_list;
Node *raw_parse_tree;
const char *commandTag;
List *querytree_list;
CachedPlanSource *psrc;
bool is_named;
bool save_log_statement_stats = log_statement_stats;
char msec_str[32];
...
/*
* Send ParseComplete.
*/
if (whereToSendOutput == DestRemote)
pq_putemptymessage('1');
/*
* Emit duration logging if appropriate.
*/
switch (check_log_duration(msec_str, false))
{
case 1:
ereport(LOG,
(errmsg("duration: %s ms", msec_str),
errhidestmt(true)));
break;
case 2:
ereport(LOG,
(errmsg("duration: %s ms parse %s: %s",
msec_str,
*stmt_name ? stmt_name : "<unnamed>",
query_string),
errhidestmt(true)));
break;
}
if (save_log_statement_stats)
ShowUsage("PARSE MESSAGE STATISTICS");
debug_query_string = NULL;
}
bind
/*
* exec_bind_message
*
* Process a "Bind" message to create a portal from a prepared statement
*/
static void
exec_bind_message(StringInfo input_message)
{
const char *portal_name;
const char *stmt_name;
int numPFormats;
int16 *pformats = NULL;
int numParams;
int numRFormats;
int16 *rformats = NULL;
CachedPlanSource *psrc;
CachedPlan *cplan;
Portal portal;
char *query_string;
char *saved_stmt_name;
ParamListInfo params;
MemoryContext oldContext;
bool save_log_statement_stats = log_statement_stats;
bool snapshot_set = false;
char msec_str[32];
...
/*
* Now we can define the portal.
*
* DO NOT put any code that could possibly throw an error between the
* above GetCachedPlan call and here.
*/
PortalDefineQuery(portal,
saved_stmt_name,
query_string,
psrc->commandTag,
cplan->stmt_list,
cplan);
/* Done with the snapshot used for parameter I/O and parsing/planning */
if (snapshot_set)
PopActiveSnapshot();
/*
* And we're ready to start portal execution.
*/
PortalStart(portal, params, 0, InvalidSnapshot);
/*
* Apply the result format requests to the portal.
*/
PortalSetResultFormat(portal, numRFormats, rformats);
/*
* Send BindComplete.
*/
if (whereToSendOutput == DestRemote)
pq_putemptymessage('2');
/*
* Emit duration logging if appropriate.
*/
switch (check_log_duration(msec_str, false))
{
case 1:
ereport(LOG,
(errmsg("duration: %s ms", msec_str),
errhidestmt(true)));
break;
case 2:
ereport(LOG,
(errmsg("duration: %s ms bind %s%s%s: %s",
msec_str,
*stmt_name ? stmt_name : "<unnamed>",
*portal_name ? "/" : "",
*portal_name ? portal_name : "",
psrc->query_string),
errhidestmt(true),
errdetail_params(params)));
break;
}
if (save_log_statement_stats)
ShowUsage("BIND MESSAGE STATISTICS");
debug_query_string = NULL;
}
execute
/*
* exec_execute_message
*
* Process an "Execute" message for a portal
*/
static void
exec_execute_message(const char *portal_name, long max_rows)
{
CommandDest dest;
DestReceiver *receiver;
Portal portal;
bool completed;
char completionTag[COMPLETION_TAG_BUFSIZE];
const char *sourceText;
const char *prepStmtName;
ParamListInfo portalParams;
bool save_log_statement_stats = log_statement_stats;
bool is_xact_command;
bool execute_is_fetch;
bool was_logged = false;
char msec_str[32];
...
/*
* Okay to run the portal.
*/
if (max_rows <= 0)
max_rows = FETCH_ALL;
completed = PortalRun(portal,
max_rows,
true, /* always top level */
receiver,
receiver,
completionTag);
(*receiver->rDestroy) (receiver);
if (completed)
{
if (is_xact_command)
{
/*
* If this was a transaction control statement, commit it. We
* will start a new xact command for the next command (if any).
*/
finish_xact_command();
}
else
{
/*
* We need a CommandCounterIncrement after every query, except
* those that start or end a transaction block.
*/
CommandCounterIncrement();
}
/* Send appropriate CommandComplete to client */
EndCommand(completionTag, dest);
}
else
{
/* Portal run not complete, so send PortalSuspended */
if (whereToSendOutput == DestRemote)
pq_putemptymessage('s');
}
/*
* Emit duration logging if appropriate.
*/
switch (check_log_duration(msec_str, was_logged))
{
case 1:
ereport(LOG,
(errmsg("duration: %s ms", msec_str),
errhidestmt(true)));
break;
case 2:
ereport(LOG,
(errmsg("duration: %s ms %s %s%s%s: %s",
msec_str,
execute_is_fetch ?
_("execute fetch from") :
_("execute"),
prepStmtName,
*portal_name ? "/" : "",
*portal_name ? portal_name : "",
sourceText),
errhidestmt(true),
errdetail_params(portalParams)));
break;
}
if (save_log_statement_stats)
ShowUsage("EXECUTE MESSAGE STATISTICS");
debug_query_string = NULL;
}