pgsql_fdw cache lookup failed for type 0 error when used in function

5 minute read

背景

【2012-07-11 补充】

已解决

http://sourceforge.net/tracker/index.php?func=detail&aid=3539995&group_id=565635&atid=2294281

##【原文】

今天准备使用pgsql_fdw同步两个数据库之间的数据, 同步的过程写在函数里面.

但是在同步时发现会报错,

下载并安装pgsql_fdw :

https://github.com/dvarrazzo/interdbconnect/downloads  
mv pgsql_fdw $PG_SRC/contrib/  
cd $PG_SRC/contrib/pgsql_fdw  
. /home/postgres/.bash_profile  
USE_PGXS=1 make  
USE_PGXS=1 make install  

外部表的创建如下 :

digoal=# create extension pgsql_fdw;  
CREATE EXTENSION  
  
CREATE SERVER srv_digoal FOREIGN DATA WRAPPER pgsql_fdw  
OPTIONS (host '172.16.3.33', port '5432', dbname 'digoal');  
  
CREATE USER MAPPING FOR digoal SERVER srv_digoal  
OPTIONS (user 'rmt_digoal', password 'DIGOAL123');  
  
grant usage on foreign server srv_digoal to digoal;  
  
\c digoal digoal  
CREATE FOREIGN TABLE bill_table_digoal  
(  
id numeric(20,0),  
cn_id character varying(20),  
cn_name character varying(100),  
cn_type numeric(1,0),  
requestor character varying(3),  
cn_desc character varying(1000),  
create_time timestamp(0) without time zone,  
cn_code character varying(100),  
check_type numeric(1,0),  
check_msg character varying(400)  
) server srv_digoal options (nspname 'digoal', relname 'table_digoal');  

创建临时工作表, 用于暂缓中间数据 :

create table work_table_table_digoal (like table_digoal including all);  

同步数据的函数如下, 当有新增数据或数据不相等时同步 :

create or replace function sync_table_digoal() returns int as $$  
declare  
v_src_count int;  
v_dst_count int;  
v_equal_count int;  
begin  
v_src_count := 0;  
v_dst_count := 0;  
v_equal_count := 0;  
truncate table ONLY work_table_table_digoal;  
insert into work_table_table_digoal   
  (id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)   
  select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from ft_bill_table_digoal;  
select count(*) into v_src_count from work_table_table_digoal;  
select count(*) into v_dst_count from table_digoal;  
raise notice 'v_src_count:%, v_dst_count:%',v_src_count,v_dst_count;  
if ( v_src_count = v_dst_count and v_src_count <> 0 ) then  
  -- 比较所有not null 字段  
  select count(*) into v_equal_count from work_table_table_digoal t1,table_digoal t2  
    where t1.id=t2.id   
    and t1.cn_id = t2.cn_id  
    and t1.cn_name = t2.cn_name  
    and t1.cn_type = t2.cn_type  
    and t1.requestor = t2.requestor  
    and t1.cn_code = t2.cn_code  
    and t1.check_type = t2.check_type;  
  raise notice 'v_src_count:%, v_dst_count:%, v_equal_count:%',v_src_count,v_dst_count,v_equal_count;  
  if ( v_equal_count <> v_src_count ) then  
    truncate table ONLY table_digoal;  
    insert into table_digoal   
    (id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)  
    select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from work_table_table_digoal;  
  end if;  
elsif ( v_src_count <> v_dst_count and v_src_count <> 0 ) then  
  truncate table ONLY table_digoal;  
  insert into table_digoal   
  (id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)  
  select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from work_table_table_digoal;  
elsif v_src_count = 0 then  
  raise notice 'ERROR: src no data.';  
  return 1;  
end if;  
return 0;  
exception when others then  
  raise notice 'ERROR: ';  
  raise notice 'v_src_count:%, v_dst_count:%, v_equal_count:%',v_src_count,v_dst_count,v_equal_count;  
  return 1;  
end;  
$$ language plpgsql;  

调用这个函数进行同步时, 报错出现在

insert into work_table_table_digoal   
  (id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)   
  select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from ft_bill_table_digoal;  

也就是使用外部表同步过来的报错.

这句SQL拿出来在psql环境中单独执行没有问题. 只是放在函数中就出问题. 后来我不使用pgsql_fdw, 而是通过dblink创建一个远程表的视图则不会报错了.

可能是个pgsql_fdw的BUG.

【nagios监控】

同步脚本

postgres@db-192-168-100-33-> cat sync_digoal.sh  
#!/bin/bash  
. /home/postgres/.bash_profile  
echo -e "start sync digoal1;"  
date +%F\ %T  
psql -h 127.0.0.1 digoal digoal -c "select * from sync_digoal1()";  
date +%F\ %T  
echo -e "end sync digoal1;"  
  
echo -e "start sync digoal2;"  
date +%F\ %T  
psql -h 127.0.0.1 digoal digoal -c "select * from sync_digoal2()";  
date +%F\ %T  
echo -e "end sync digoal2;"  

执行计划

postgres@db-192-168-100-33-> crontab -l  
# sync digoal_data from idc1  
*/3 * * * * /home/postgres/script/sync_digoal/sync_digoal.sh >>/tmp/sync_digoal.log 2>&1  

nagios配置

[root@db-192-168-100-33 tmp]# less /usr/local/nagios/etc/nrpe.cfg   
command[check_sync_digoal]=/usr/local/nagios/libexec/check_sync_digoal.sh  

nagios监控脚本

[root@db-192-168-100-33 tmp]# cat /usr/local/nagios/libexec/check_sync_digoal.sh  
#!/bin/bash  
  
. /home/nagios/.bash_profile  
  
# FILE需和 postgresql crontab里面输出的文件一致  
# ALIVE_MINUTES=20 表示20分钟内$FILE被修改过, 心跳存在. 否则心跳停止(告警).  
FILE=/tmp/sync_digoal.log  
ALIVE_MINUTES=20  
ALIVE_CNT=0  
  
# 文件是否存在  
if [ -f $FILE ]; then  
  ALIVE_CNT=`find $FILE -mmin -$ALIVE_MINUTES -print|wc -l`  
  if [ $ALIVE_CNT -eq 1 ]; then  
    ERR_CNT=`tail -n 100 $FILE|grep -c ERROR`  
    if [ $ERR_CNT -ge 1 ]; then  
      tail -n 100 $FILE|grep ERROR  
      exit 2  
    else  
      exit 0  
    fi  
  else  
    echo -e "keepalive timeout $ALIVE_MINUTES mintues with FILE: $FILE."  
    exit 2  
  fi  
else  
  echo -e "File: $FILE not exist."  
  exit 2  
fi  

【排错】

strace -s 1024 -p 执行该函数的backend pid  

执行该函数

跟踪到 :

lsyscache.c\0L2440\0RgetTypeOutputInfo\0\0  

错误来自lsyscache.c里面的getTypeOutputInfo函数 :

/*  
 * getTypeOutputInfo  
 *  
 *              Get info needed for printing values of a type  
 */  
void  
getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)  
{  
        HeapTuple       typeTuple;  
        Form_pg_type pt;  
  
        typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type));  
        if (!HeapTupleIsValid(typeTuple))  
                elog(ERROR, "cache lookup failed for type %u", type);  
        pt = (Form_pg_type) GETSTRUCT(typeTuple);  
  
        if (!pt->typisdefined)  
                ereport(ERROR,  
                                (errcode(ERRCODE_UNDEFINED_OBJECT),  
                                 errmsg("type %s is only a shell",  
                                                format_type_be(type))));  
        if (!OidIsValid(pt->typoutput))  
                ereport(ERROR,  
                                (errcode(ERRCODE_UNDEFINED_FUNCTION),  
                                 errmsg("no output function available for type %s",  
                                                format_type_be(type))));  
  
        *typOutput = pt->typoutput;  
        *typIsVarlena = (!pt->typbyval) && (pt->typlen == -1);  
  
        ReleaseSysCache(typeTuple);  
}  

或者在psql中设置一个高级选项VERBOSITY = verbose也可以输出详细的错误信息.

例如 :

digoal=# \set VERBOSITY verbose  
digoal=# select * from f_test();  
ERROR:  XX000: cache lookup failed for type 0  
CONTEXT:  SQL statement "SELECT 1 from foreign_table_test limit 1"  
PL/pgSQL function "f_test" line 3 at PERFORM  
LOCATION:  getTypeOutputInfo, lsyscache.c:2440  

VERBOSITY可以设置三个选项, default, verbose, terse.

在pgsql_fdw模块中, 调用getTypeOutputInfo函数的地方,

pgsql_fdw.c :

getTypeOutputInfo(types[i], &out_func_oid, &isvarlena);  

修改为 :

				if ( types[i] != 0 )  
                                {  
                                  getTypeOutputInfo(types[i], &out_func_oid, &isvarlena);  
                                }  

以及 ruleutils.c :

        getTypeOutputInfo(constval->consttype,  
                                          &typoutput, &typIsVarlena);  

修改为 :

        if ( constval->consttype != 0 )  
        {  
        getTypeOutputInfo(constval->consttype,  
                                          &typoutput, &typIsVarlena);  
        }  

重新编译pgsql_fdw模块, 重启数据库.

重新执行函数 : 报错变为 :

postgres=# select * from f_test();  
ERROR:  XX000: cache lookup failed for function 0  
CONTEXT:  SQL statement "SELECT 1 from test limit 1"  
PL/pgSQL function "f_test" line 3 at PERFORM  
LOCATION:  fmgr_info_cxt_security, fmgr.c:216  

在pgsql_fdw 模块中调用fmgr_info_cxt_security函数的没有, 但是调用了fmgr_info, 简介调用到了fmgr_info_cxt_security.

fmgr.c :

/*  
 * This routine fills a FmgrInfo struct, given the OID  
 * of the function to be called.  
 *  
 * The caller's CurrentMemoryContext is used as the fn_mcxt of the info  
 * struct; this means that any subsidiary data attached to the info struct  
 * (either by fmgr_info itself, or later on by a function call handler)  
 * will be allocated in that context.  The caller must ensure that this  
 * context is at least as long-lived as the info struct itself.  This is  
 * not a problem in typical cases where the info struct is on the stack or  
 * in freshly-palloc'd space.  However, if one intends to store an info  
 * struct in a long-lived table, it's better to use fmgr_info_cxt.  
 */  
void  
fmgr_info(Oid functionId, FmgrInfo *finfo)  
{  
        fmgr_info_cxt(functionId, finfo, CurrentMemoryContext);  
}  
  
/*  
 * Fill a FmgrInfo struct, specifying a memory context in which its  
 * subsidiary data should go.  
 */  
void  
fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)  
{  
        fmgr_info_cxt_security(functionId, finfo, mcxt, false);  
}  

pgsql_fdw.c :

fmgr_info(out_func_oid, &func);  

【补充】

1. 创建一个pgsql_fdw外部表的视图, 把上面的函数中的外部表改为视图, 错误依旧.

postgres=# create view v_test as select * from test;  
CREATE VIEW  
postgres=# select * from v_test;  
 id |  info    
----+--------  
  1 | DIGOAL  
(1 row)  
postgres=# CREATE OR REPLACE FUNCTION public.f_test()  
 RETURNS integer  
 LANGUAGE plpgsql  
AS $function$  
declare  
begin  
perform 1 from v_test limit 1;  
return 1;  
end;  
$function$;  
CREATE FUNCTION  
postgres=# select * from f_test();  
ERROR:  cache lookup failed for type 0  
CONTEXT:  SQL statement "SELECT 1 from v_test limit 1"  
        PL/pgSQL function "f_test" line 4 at PERFORM  
STATEMENT:  select * from f_test();  

2. 在SQL函数中使用pgsql_fdw的外部表正常.

postgres=# create or replace function f_test1() returns int as $$  
postgres$# select id from test limit 1;  
postgres$# $$ language sql;  
CREATE FUNCTION  
postgres=# select * from f_test1();  
 f_test1   
---------  
       1  
(1 row)  

3. 在plpgsql函数中使用oracle fdw的外部表没有异常.

charge_sync=> create or replace function f_test () returns int as $$  
charge_sync$> declare  
charge_sync$> begin  
charge_sync$> perform 1 from ora_tbl limit 1;  
charge_sync$> return 0;  
charge_sync$> end;  
charge_sync$> $$ language plpgsql;  
CREATE FUNCTION  
charge_sync=> select * from f_test();  
 f_test   
--------  
      0  
(1 row)  

【参考】

pgsql_fdw.c

src/backend/utils/fmgr/fmgr.c

src/backend/utils/cache/lsyscache.c

Flag Counter

digoal’s 大量PostgreSQL文章入口