PostgreSQL json内容特殊字符使用unicode代替

less than 1 minute read

背景

json中包含特殊字符时,可能导致输入异常,可以转换为UNICODE后,即可正常输入。

格式为 \u[0-f]{4}

postgres=# select '{"id1":1, "id2":2, "info":"digoal d\u0061t\u0061 ab", "c1":123, "c2":1.1, "c3":1.9999, "crt_time":"2018-01-01 10:10:10"}'::jsonb;  
                                                         jsonb                                                           
-----------------------------------------------------------------------------------------------------------------------  
 {"c1": 123, "c2": 1.1, "c3": 1.9999, "id1": 1, "id2": 2, "info": "digoal data ab", "crt_time": "2018-01-01 10:10:10"}  
(1 row)  

格式

\u[0-f]{4}
json_lex_string@src/backend/utils/adt/json.c  
  
                else if (*s == '\\')  
                {  
                        /* OK, we have an escape character. */  
                        s++;  
                        len++;  
                        if (len >= lex->input_length)  
                        {  
                                lex->token_terminator = s;  
                                report_invalid_token(lex);  
                        }  
                        else if (*s == 'u')  
                        {  
                                int                     i;  
                                int                     ch = 0;  
  
                                for (i = 1; i <= 4; i++)  
                                {  
                                        s++;  
                                        len++;  
                                        if (len >= lex->input_length)  
                                        {  
                                                lex->token_terminator = s;  
                                                report_invalid_token(lex);  
                                        }  
                                        else if (*s >= '0' && *s <= '9')  
                                                ch = (ch * 16) + (*s - '0');  
                                        else if (*s >= 'a' && *s <= 'f')  
                                                ch = (ch * 16) + (*s - 'a') + 10;  
                                        else if (*s >= 'A' && *s <= 'F')  
                                                ch = (ch * 16) + (*s - 'A') + 10;  
                                        else  
                                        {  
                                                lex->token_terminator = s + pg_mblen(s);  
                                                ereport(ERROR,  
                                                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),  
                                                                 errmsg("invalid input syntax for type %s",  
                                                                                "json"),  
                                                                 errdetail("\"\\u\" must be followed by four hexadecimal digits."),  
                                                                 report_json_context(lex)));  
                                        }  
                                }  

参考

json_lex_string@src/backend/utils/adt/json.c

《PostgreSQL 转义、UNICODE、与SQL注入》

Flag Counter

digoal’s 大量PostgreSQL文章入口