PostgreSQL json内容特殊字符使用unicode代替
背景
json中包含特殊字符时,可能导致输入异常,可以转换为UNICODE后,即可正常输入。
格式为 \u[0-f]{4}
postgres=# select '{"id1":1, "id2":2, "info":"digoal d\u0061t\u0061 ab", "c1":123, "c2":1.1, "c3":1.9999, "crt_time":"2018-01-01 10:10:10"}'::jsonb;
jsonb
-----------------------------------------------------------------------------------------------------------------------
{"c1": 123, "c2": 1.1, "c3": 1.9999, "id1": 1, "id2": 2, "info": "digoal data ab", "crt_time": "2018-01-01 10:10:10"}
(1 row)
格式
\u[0-f]{4}
json_lex_string@src/backend/utils/adt/json.c
else if (*s == '\\')
{
/* OK, we have an escape character. */
s++;
len++;
if (len >= lex->input_length)
{
lex->token_terminator = s;
report_invalid_token(lex);
}
else if (*s == 'u')
{
int i;
int ch = 0;
for (i = 1; i <= 4; i++)
{
s++;
len++;
if (len >= lex->input_length)
{
lex->token_terminator = s;
report_invalid_token(lex);
}
else if (*s >= '0' && *s <= '9')
ch = (ch * 16) + (*s - '0');
else if (*s >= 'a' && *s <= 'f')
ch = (ch * 16) + (*s - 'a') + 10;
else if (*s >= 'A' && *s <= 'F')
ch = (ch * 16) + (*s - 'A') + 10;
else
{
lex->token_terminator = s + pg_mblen(s);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("\"\\u\" must be followed by four hexadecimal digits."),
report_json_context(lex)));
}
}
参考
json_lex_string@src/backend/utils/adt/json.c
《PostgreSQL 转义、UNICODE、与SQL注入》