PostgreSQL csvlog 源码分析
背景
PostgreSQL csvlog日志格式记录了非常多的信息,通过CSV外部表,可以使用SQL对日志进行分析。
文档中的例子:
http://www.postgresql.org/docs/9.5/static/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-CSVLOG
CREATE TABLE postgres_log
(
log_time timestamp(3) with time zone,
user_name text,
database_name text,
process_id integer,
connection_from text,
session_id text,
session_line_num bigint,
command_tag text,
session_start_time timestamp with time zone,
virtual_transaction_id text,
transaction_id bigint,
error_severity text,
sql_state_code text,
message text,
detail text,
hint text,
internal_query text,
internal_query_pos integer,
context text,
query text,
query_pos integer,
location text,
application_name text,
PRIMARY KEY (session_id, session_line_num)
);
To import a log file into this table, use the COPY FROM command:
COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
那么csvlog每个字段的含义是什么呢?
有些字面比较好理解,有些不太好理解,不用担心,PostgreSQL的代码非常简洁,不了解的字段就去看看代码吧:
src/include/utils/elog.h
/*
* ErrorData holds the data accumulated during any one ereport() cycle.
* Any non-NULL pointers must point to palloc'd data.
* (The const pointers are an exception; we assume they point at non-freeable
* constant strings.)
*/
typedef struct ErrorData
{
int elevel; /* error level */
bool output_to_server; /* will report to server log? */
bool output_to_client; /* will report to client? */
bool show_funcname; /* true to force funcname inclusion */
bool hide_stmt; /* true to prevent STATEMENT: inclusion */
const char *filename; /* __FILE__ of ereport() call */
int lineno; /* __LINE__ of ereport() call */
const char *funcname; /* __func__ of ereport() call */
const char *domain; /* message domain */
const char *context_domain; /* message domain for context message */
int sqlerrcode; /* encoded ERRSTATE */
char *message; /* primary error message */
char *detail; /* detail error message */
char *detail_log; /* detail error message for server log only */
char *hint; /* hint message */
char *context; /* context message */
char *schema_name; /* name of schema */
char *table_name; /* name of table */
char *column_name; /* name of column */
char *datatype_name; /* name of datatype */
char *constraint_name; /* name of constraint */
int cursorpos; /* cursor index into query string */
int internalpos; /* cursor index into internalquery */
char *internalquery; /* text of internally-generated query */
int saved_errno; /* errno at entry */
/* context containing associated non-constant strings */
struct MemoryContextData *assoc_context;
} ErrorData;
write_csvlog的接口
src/backend/utils/error/elog.c
/*
* Constructs the error message, depending on the Errordata it gets, in a CSV
* format which is described in doc/src/sgml/config.sgml.
*/
static void
write_csvlog(ErrorData *edata)
{
StringInfoData buf;
bool print_stmt = false;
/* static counter for line numbers */
static long log_line_number = 0;
/* has counter been reset in current process? */
static int log_my_pid = 0;
/*
* This is one of the few places where we'd rather not inherit a static
* variable's value from the postmaster. But since we will, reset it when
* MyProcPid changes.
*/
if (log_my_pid != MyProcPid)
{
log_line_number = 0;
log_my_pid = MyProcPid;
formatted_start_time[0] = '\0';
}
log_line_number++;
initStringInfo(&buf);
// 从这里开始,每个字段什么意思都可以看到,每个字段都用appendStringInfoChar(&buf, ',');隔开来了。
/*
* timestamp with milliseconds
*
* Check if the timestamp is already calculated for the syslog message,
* and use it if so. Otherwise, get the current timestamp. This is done
* to put same timestamp in both syslog and csvlog messages.
*/
if (formatted_log_time[0] == '\0')
setup_formatted_log_time();
appendStringInfoString(&buf, formatted_log_time);
appendStringInfoChar(&buf, ',');
/* username */
if (MyProcPort)
appendCSVLiteral(&buf, MyProcPort->user_name);
appendStringInfoChar(&buf, ',');
/* database name */
if (MyProcPort)
appendCSVLiteral(&buf, MyProcPort->database_name);
appendStringInfoChar(&buf, ',');
/* Process id */
if (MyProcPid != 0)
appendStringInfo(&buf, "%d", MyProcPid);
appendStringInfoChar(&buf, ',');
/* Remote host and port */
if (MyProcPort && MyProcPort->remote_host)
{
appendStringInfoChar(&buf, '"');
appendStringInfoString(&buf, MyProcPort->remote_host);
if (MyProcPort->remote_port && MyProcPort->remote_port[0] != '\0')
{
appendStringInfoChar(&buf, ':');
appendStringInfoString(&buf, MyProcPort->remote_port);
}
appendStringInfoChar(&buf, '"');
}
appendStringInfoChar(&buf, ',');
/* session id */ // session id 是两个字段组成的分别是后台进程的启动时间和PID,所以是唯一的
appendStringInfo(&buf, "%lx.%x", (long) MyStartTime, MyProcPid);
appendStringInfoChar(&buf, ',');
/* Line number */
appendStringInfo(&buf, "%ld", log_line_number);
appendStringInfoChar(&buf, ',');
/* PS display */
if (MyProcPort)
{
StringInfoData msgbuf;
const char *psdisp;
int displen;
initStringInfo(&msgbuf);
psdisp = get_ps_display(&displen);
appendBinaryStringInfo(&msgbuf, psdisp, displen);
appendCSVLiteral(&buf, msgbuf.data);
pfree(msgbuf.data);
}
appendStringInfoChar(&buf, ',');
/* session start timestamp */
if (formatted_start_time[0] == '\0')
setup_formatted_start_time();
appendStringInfoString(&buf, formatted_start_time);
appendStringInfoChar(&buf, ',');
/* Virtual transaction id */
/* keep VXID format in sync with lockfuncs.c */
if (MyProc != NULL && MyProc->backendId != InvalidBackendId)
appendStringInfo(&buf, "%d/%u", MyProc->backendId, MyProc->lxid);
appendStringInfoChar(&buf, ',');
/* Transaction id */
appendStringInfo(&buf, "%u", GetTopTransactionIdIfAny());
appendStringInfoChar(&buf, ',');
/* Error severity */
appendStringInfoString(&buf, error_severity(edata->elevel));
appendStringInfoChar(&buf, ',');
/* SQL state code */
appendStringInfoString(&buf, unpack_sql_state(edata->sqlerrcode));
appendStringInfoChar(&buf, ',');
/* errmessage */
appendCSVLiteral(&buf, edata->message);
appendStringInfoChar(&buf, ',');
/* errdetail or errdetail_log */ // 是否输出代码位置
if (edata->detail_log)
appendCSVLiteral(&buf, edata->detail_log);
else
appendCSVLiteral(&buf, edata->detail);
appendStringInfoChar(&buf, ',');
/* errhint */
appendCSVLiteral(&buf, edata->hint);
appendStringInfoChar(&buf, ',');
/* internal query */
appendCSVLiteral(&buf, edata->internalquery);
appendStringInfoChar(&buf, ',');
/* if printed internal query, print internal pos too */
if (edata->internalpos > 0 && edata->internalquery != NULL)
appendStringInfo(&buf, "%d", edata->internalpos);
appendStringInfoChar(&buf, ',');
/* errcontext */
if (!edata->hide_ctx)
appendCSVLiteral(&buf, edata->context);
appendStringInfoChar(&buf, ',');
/* user query --- only reported if not disabled by the caller */
if (is_log_level_output(edata->elevel, log_min_error_statement) &&
debug_query_string != NULL &&
!edata->hide_stmt)
print_stmt = true;
if (print_stmt)
appendCSVLiteral(&buf, debug_query_string);
appendStringInfoChar(&buf, ',');
if (print_stmt && edata->cursorpos > 0)
appendStringInfo(&buf, "%d", edata->cursorpos);
appendStringInfoChar(&buf, ',');
/* file error location */
if (Log_error_verbosity >= PGERROR_VERBOSE)
{
StringInfoData msgbuf;
initStringInfo(&msgbuf);
if (edata->funcname && edata->filename)
appendStringInfo(&msgbuf, "%s, %s:%d",
edata->funcname, edata->filename,
edata->lineno);
else if (edata->filename)
appendStringInfo(&msgbuf, "%s:%d",
edata->filename, edata->lineno);
appendCSVLiteral(&buf, msgbuf.data);
pfree(msgbuf.data);
}
appendStringInfoChar(&buf, ',');
/* application name */
if (application_name)
appendCSVLiteral(&buf, application_name);
appendStringInfoChar(&buf, '\n');
/* If in the syslogger process, try to write messages direct to file */
if (am_syslogger)
write_syslogger_file(buf.data, buf.len, LOG_DESTINATION_CSVLOG);
else
write_pipe_chunks(buf.data, buf.len, LOG_DESTINATION_CSVLOG);
pfree(buf.data);
}
另外需要提一下,如果写日志的是syslogger则直接写文件,如果是其他进程,则把日志发到pipe管道。
如果开启了SQL审计日志,小事务高并发会受到较大的影响,优化可以从这里的代码入手哦。