PostgreSQL SQL log duration time 源码分析

6 minute read

背景

PostgreSQL 可以通过参数设置是否要记录SQL的执行时间,以及执行时间超过多少的SQL。

注意这里的执行时间实际上包含了网络的传输时间。

所以在遇到慢查询时,除了要排查数据库的问题,实际上还需要排查网络以及客户端的问题,因为客户端接收数据堵塞也会造成慢查询,就像我前天写的文章。

PostgreSQL IDE pgadmin , edb postgres enterprise manager 查询慢的问题分析

https://yq.aliyun.com/articles/32438

另外需要指出的是,PostgreSQL的内核在这方面有改进的空间,最好是把网络传输的时间另外计算。

这样更容易排查问题。

如果要将网络时间另外计算,需要hack一下内核的postgres.c中的几个函数,文章后面会分析。

测试

在数据库中创建表和测试数据

postgres=> create table tbl(id int);  
CREATE TABLE  
postgres=> insert into tbl select generate_series(1,200000);  
INSERT 0 200000  
postgres=> \dt+ tbl  
                   List of relations  
 Schema | Name | Type  | Owner |  Size   | Description   
--------+------+-------+-------+---------+-------------  
 public | tbl  | table | test  | 7104 kB |   
(1 row)  

确保打开了慢查询的审计日志

postgres=> show log_min_duration_statement ;  
 log_min_duration_statement   
----------------------------  
 1s  
(1 row)  

在数据库所在服务器的本地执行如下查询,很快就返回

digoal@localhost-> date; psql -c "select * from tbl" >/dev/null ; date;  
Fri Apr 22 11:59:52 CST 2016  
Fri Apr 22 11:59:53 CST 2016  

开启一个比较慢的网络,例如手机的2G网络,然后通过手机上网连接到数据库,执行同样的SQL,耗时变长了,因为网络不稳定,时快时慢。

digoal@digoal01-> date ; psql -h remote_host -p 1921 -U test postgres -c "select * from tbl" >/dev/null ; date;  
Fri Apr 22 12:31:08 CST 2016  
Fri Apr 22 12:31:18 CST 2016  
  
  
digoal@digoal01-> date ; psql -h remote_host -p 1921 -U test postgres -c "select * from tbl limit 20000" >/dev/null ; date;  
Fri Apr 22 12:34:30 CST 2016  
Fri Apr 22 12:34:47 CST 2016  

在数据库的日志中,可以看到慢查询的审计日志,耗时包含了SQL数据库将数据传输到客户端的时间。

2016-04-22 12:33:13.112 CST,"test","postgres",2680,"36.16.129.195:11812",5719a97f.a78,3,"SELECT",2016-04-22 12:33:03 CST,7/0,0,LOG,00000,"duration: 8300.129 ms  statement: select * from tbl limit 20000",,,,,,,,"exec_simple_query, postgres.c:1149","psql"  

对应的代码

src/backend/tcop/postgres.c

check_log_duration  

检查是否需要输出duration。

同时计算从语句时间开始到当前时间的一个时间差,也就是SQL的duration。

有4个接口会记录时间

exec_execute_message 使用绑定变量时, execute sql的时间  
exec_bind_message 使用绑定变量时, bind的时间  
exec_parse_message 使用绑定变量时, parse的时间  
exec_simple_query 未使用绑定变量时,执行SQL的时间  

这几个函数的代码如下

/*  
 * check_log_duration  
 *      Determine whether current command's duration should be logged  
 *  
 * Returns:  
 *      0 if no logging is needed, 不需要记录SQL时间  
 *      1 if just the duration should be logged, 需要记录SQL时间, 但是不需要记录SQL详情  
 *      2 if duration and query details should be logged, 需要记录SQL时间,同时需要记录SQL 详情  
 *  
 * If logging is needed, the duration in msec is formatted into msec_str[],  
 * which must be a 32-byte buffer.  
 *  
 * was_logged should be TRUE if caller already logged query details (this  
 * essentially prevents 2 from being returned).  
 */  
int  
check_log_duration(char *msec_str, bool was_logged)  
{  
    if (log_duration || log_min_duration_statement >= 0)  
    {  
        long        secs;  
        int         usecs;  
        int         msecs;  
        bool        exceeded;  
  
        TimestampDifference(GetCurrentStatementStartTimestamp(),  
                            GetCurrentTimestamp(),  
                            &secs, &usecs);  // 语句开始到当前的时间  
        msecs = usecs / 1000;  
  
        /*  
         * This odd-looking test for log_min_duration_statement being exceeded  
         * is designed to avoid integer overflow with very long durations:  
         * don't compute secs * 1000 until we've verified it will fit in int.  
         */  
        exceeded = (log_min_duration_statement == 0 ||  
                    (log_min_duration_statement > 0 &&  
                     (secs > log_min_duration_statement / 1000 ||  
                      secs * 1000 + msecs >= log_min_duration_statement)));  
  
        if (exceeded || log_duration)  
        {  
            snprintf(msec_str, 32, "%ld.%03d",  
                     secs * 1000 + msecs, usecs % 1000);  
            if (exceeded && !was_logged)  
                return 2;  
            else  
                return 1;  
        }  
    }  
  
    return 0;  
}  

simple exec

/*  
 * exec_simple_query  
 *  
 * Execute a "simple Query" protocol message.  
 */  
static void  
exec_simple_query(const char *query_string)  
{  
    CommandDest dest = whereToSendOutput;  
    MemoryContext oldcontext;  
    List       *parsetree_list;  
    ListCell   *parsetree_item;  
    bool        save_log_statement_stats = log_statement_stats;  
    bool        was_logged = false;  
    bool        isTopLevel;  
    char        msec_str[32];  
...  
        /*  
         * Create unnamed portal to run the query or queries in. If there  
         * already is one, silently drop it.  
         */  
        portal = CreatePortal("", true, true);  
        /* Don't display the portal in pg_cursors */  
        portal->visible = false;  
  
        /*  
         * We don't have to copy anything into the portal, because everything  
         * we are passing here is in MessageContext, which will outlive the  
         * portal anyway.  
         */  
        PortalDefineQuery(portal,  
                          NULL,  
                          query_string,  
                          commandTag,  
                          plantree_list,  
                          NULL);  
  
        /*  
         * Start the portal.  No parameters here.  
         */  
        PortalStart(portal, NULL, 0, InvalidSnapshot);  
  
        /*  
         * Select the appropriate output format: text unless we are doing a  
         * FETCH from a binary cursor.  (Pretty grotty to have to do this here  
         * --- but it avoids grottiness in other places.  Ah, the joys of  
         * backward compatibility...)  
         */  
        format = 0;             /* TEXT is default */  
        if (IsA(parsetree, FetchStmt))  
        {  
            FetchStmt  *stmt = (FetchStmt *) parsetree;  
  
            if (!stmt->ismove)  
            {  
                Portal      fportal = GetPortalByName(stmt->portalname);  
  
                if (PortalIsValid(fportal) &&  
                    (fportal->cursorOptions & CURSOR_OPT_BINARY))  
                    format = 1; /* BINARY */  
            }  
        }  
        PortalSetResultFormat(portal, 1, &format);  
  
        /*  
         * Now we can create the destination receiver object.  
         */  
        receiver = CreateDestReceiver(dest);  
        if (dest == DestRemote)  
            SetRemoteDestReceiverParams(receiver, portal);  
  
        /*  
         * Switch back to transaction context for execution.  
         */  
        MemoryContextSwitchTo(oldcontext);  
  
        /*  
         * Run the portal to completion, and then drop it (and the receiver).  
         */  
        (void) PortalRun(portal,  
                         FETCH_ALL,  
                         isTopLevel,  
                         receiver,  
                         receiver,  
                         completionTag);  
  
        (*receiver->rDestroy) (receiver);  
  
        PortalDrop(portal, false);  
  
        if (IsA(parsetree, TransactionStmt))  
        {  
            /*  
             * If this was a transaction control statement, commit it. We will  
             * start a new xact command for the next command (if any).  
             */  
            finish_xact_command();  
        }  
...  
    /*  
     * Close down transaction statement, if one is open.  
     */  
    finish_xact_command();  
  
    /*  
     * If there were no parsetrees, return EmptyQueryResponse message.  
     */  
    if (!parsetree_list)  
        NullCommand(dest);  
  
    /*  
     * Emit duration logging if appropriate.  
     */  
    switch (check_log_duration(msec_str, was_logged))  
    {  
        case 1:  
            ereport(LOG,  
                    (errmsg("duration: %s ms", msec_str),  
                     errhidestmt(true)));  
            break;  
        case 2:  
            ereport(LOG,  
                    (errmsg("duration: %s ms  statement: %s",  
                            msec_str, query_string),  
                     errhidestmt(true),  
                     errdetail_execute(parsetree_list)));  
            break;  
    }  
  
    if (save_log_statement_stats)  
        ShowUsage("QUERY STATISTICS");  
  
    TRACE_POSTGRESQL_QUERY_DONE(query_string);  
  
    debug_query_string = NULL;  
}  

parse

/*  
 * exec_parse_message  
 *  
 * Execute a "Parse" protocol message.  
 */  
static void  
exec_parse_message(const char *query_string,    /* string to execute */  
                   const char *stmt_name,       /* name for prepared stmt */  
                   Oid *paramTypes,     /* parameter types */  
                   int numParams)       /* number of parameters */  
{  
    MemoryContext unnamed_stmt_context = NULL;  
    MemoryContext oldcontext;  
    List       *parsetree_list;  
    Node       *raw_parse_tree;  
    const char *commandTag;  
    List       *querytree_list;  
    CachedPlanSource *psrc;  
    bool        is_named;  
    bool        save_log_statement_stats = log_statement_stats;  
    char        msec_str[32];  
...  
    /*  
     * Send ParseComplete.  
     */  
    if (whereToSendOutput == DestRemote)  
        pq_putemptymessage('1');  
  
    /*  
     * Emit duration logging if appropriate.  
     */  
    switch (check_log_duration(msec_str, false))  
    {  
        case 1:  
            ereport(LOG,  
                    (errmsg("duration: %s ms", msec_str),  
                     errhidestmt(true)));  
            break;  
        case 2:  
            ereport(LOG,  
                    (errmsg("duration: %s ms  parse %s: %s",  
                            msec_str,  
                            *stmt_name ? stmt_name : "<unnamed>",  
                            query_string),  
                     errhidestmt(true)));  
            break;  
    }  
  
    if (save_log_statement_stats)  
        ShowUsage("PARSE MESSAGE STATISTICS");  
  
    debug_query_string = NULL;  
}  
  

bind

/*  
 * exec_bind_message  
 *  
 * Process a "Bind" message to create a portal from a prepared statement  
 */  
static void  
exec_bind_message(StringInfo input_message)  
{  
    const char *portal_name;  
    const char *stmt_name;  
    int         numPFormats;  
    int16      *pformats = NULL;  
    int         numParams;  
    int         numRFormats;  
    int16      *rformats = NULL;  
    CachedPlanSource *psrc;  
    CachedPlan *cplan;  
    Portal      portal;  
    char       *query_string;  
    char       *saved_stmt_name;  
    ParamListInfo params;  
    MemoryContext oldContext;  
    bool        save_log_statement_stats = log_statement_stats;  
    bool        snapshot_set = false;  
    char        msec_str[32];  
...  
    /*  
     * Now we can define the portal.  
     *  
     * DO NOT put any code that could possibly throw an error between the  
     * above GetCachedPlan call and here.  
     */  
    PortalDefineQuery(portal,  
                      saved_stmt_name,  
                      query_string,  
                      psrc->commandTag,  
                      cplan->stmt_list,  
                      cplan);  
  
    /* Done with the snapshot used for parameter I/O and parsing/planning */  
    if (snapshot_set)  
        PopActiveSnapshot();  
  
    /*  
     * And we're ready to start portal execution.  
     */  
    PortalStart(portal, params, 0, InvalidSnapshot);  
  
    /*  
     * Apply the result format requests to the portal.  
     */  
    PortalSetResultFormat(portal, numRFormats, rformats);  
  
    /*  
     * Send BindComplete.  
     */  
    if (whereToSendOutput == DestRemote)  
        pq_putemptymessage('2');  
  
    /*  
     * Emit duration logging if appropriate.  
     */  
    switch (check_log_duration(msec_str, false))  
    {  
        case 1:  
            ereport(LOG,  
                    (errmsg("duration: %s ms", msec_str),  
                     errhidestmt(true)));  
            break;  
        case 2:  
            ereport(LOG,  
                    (errmsg("duration: %s ms  bind %s%s%s: %s",  
                            msec_str,  
                            *stmt_name ? stmt_name : "<unnamed>",  
                            *portal_name ? "/" : "",  
                            *portal_name ? portal_name : "",  
                            psrc->query_string),  
                     errhidestmt(true),  
                     errdetail_params(params)));  
            break;  
    }  
  
    if (save_log_statement_stats)  
        ShowUsage("BIND MESSAGE STATISTICS");  
  
    debug_query_string = NULL;  
}  

execute

/*  
 * exec_execute_message  
 *  
 * Process an "Execute" message for a portal  
 */  
static void  
exec_execute_message(const char *portal_name, long max_rows)  
{  
    CommandDest dest;  
    DestReceiver *receiver;  
    Portal      portal;  
    bool        completed;  
    char        completionTag[COMPLETION_TAG_BUFSIZE];  
    const char *sourceText;  
    const char *prepStmtName;  
    ParamListInfo portalParams;  
    bool        save_log_statement_stats = log_statement_stats;  
    bool        is_xact_command;  
    bool        execute_is_fetch;  
    bool        was_logged = false;  
    char        msec_str[32];  
  
...  
    /*  
     * Okay to run the portal.  
     */  
    if (max_rows <= 0)  
        max_rows = FETCH_ALL;  
  
    completed = PortalRun(portal,  
                          max_rows,  
                          true, /* always top level */  
                          receiver,  
                          receiver,  
                          completionTag);  
  
    (*receiver->rDestroy) (receiver);  
  
    if (completed)  
    {  
        if (is_xact_command)  
        {  
            /*  
             * If this was a transaction control statement, commit it.  We  
             * will start a new xact command for the next command (if any).  
             */  
            finish_xact_command();  
        }  
        else  
        {  
            /*  
             * We need a CommandCounterIncrement after every query, except  
             * those that start or end a transaction block.  
             */  
            CommandCounterIncrement();  
        }  
  
        /* Send appropriate CommandComplete to client */  
        EndCommand(completionTag, dest);  
    }  
    else  
    {  
        /* Portal run not complete, so send PortalSuspended */  
        if (whereToSendOutput == DestRemote)  
            pq_putemptymessage('s');  
    }  
  
    /*  
     * Emit duration logging if appropriate.  
     */  
    switch (check_log_duration(msec_str, was_logged))  
    {  
        case 1:  
            ereport(LOG,  
                    (errmsg("duration: %s ms", msec_str),  
                     errhidestmt(true)));  
            break;  
        case 2:  
            ereport(LOG,  
                    (errmsg("duration: %s ms  %s %s%s%s: %s",  
                            msec_str,  
                            execute_is_fetch ?  
                            _("execute fetch from") :  
                            _("execute"),  
                            prepStmtName,  
                            *portal_name ? "/" : "",  
                            *portal_name ? portal_name : "",  
                            sourceText),  
                     errhidestmt(true),  
                     errdetail_params(portalParams)));  
            break;  
    }  
  
    if (save_log_statement_stats)  
        ShowUsage("EXECUTE MESSAGE STATISTICS");  
  
    debug_query_string = NULL;  
}  

Flag Counter

digoal’s 大量PostgreSQL文章入口