pgsql_fdw cache lookup failed for type 0 error when used in function
背景
【2012-07-11 补充】
已解决
http://sourceforge.net/tracker/index.php?func=detail&aid=3539995&group_id=565635&atid=2294281
##【原文】
今天准备使用pgsql_fdw同步两个数据库之间的数据, 同步的过程写在函数里面.
但是在同步时发现会报错,
下载并安装pgsql_fdw :
https://github.com/dvarrazzo/interdbconnect/downloads
mv pgsql_fdw $PG_SRC/contrib/
cd $PG_SRC/contrib/pgsql_fdw
. /home/postgres/.bash_profile
USE_PGXS=1 make
USE_PGXS=1 make install
外部表的创建如下 :
digoal=# create extension pgsql_fdw;
CREATE EXTENSION
CREATE SERVER srv_digoal FOREIGN DATA WRAPPER pgsql_fdw
OPTIONS (host '172.16.3.33', port '5432', dbname 'digoal');
CREATE USER MAPPING FOR digoal SERVER srv_digoal
OPTIONS (user 'rmt_digoal', password 'DIGOAL123');
grant usage on foreign server srv_digoal to digoal;
\c digoal digoal
CREATE FOREIGN TABLE bill_table_digoal
(
id numeric(20,0),
cn_id character varying(20),
cn_name character varying(100),
cn_type numeric(1,0),
requestor character varying(3),
cn_desc character varying(1000),
create_time timestamp(0) without time zone,
cn_code character varying(100),
check_type numeric(1,0),
check_msg character varying(400)
) server srv_digoal options (nspname 'digoal', relname 'table_digoal');
创建临时工作表, 用于暂缓中间数据 :
create table work_table_table_digoal (like table_digoal including all);
同步数据的函数如下, 当有新增数据或数据不相等时同步 :
create or replace function sync_table_digoal() returns int as $$
declare
v_src_count int;
v_dst_count int;
v_equal_count int;
begin
v_src_count := 0;
v_dst_count := 0;
v_equal_count := 0;
truncate table ONLY work_table_table_digoal;
insert into work_table_table_digoal
(id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)
select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from ft_bill_table_digoal;
select count(*) into v_src_count from work_table_table_digoal;
select count(*) into v_dst_count from table_digoal;
raise notice 'v_src_count:%, v_dst_count:%',v_src_count,v_dst_count;
if ( v_src_count = v_dst_count and v_src_count <> 0 ) then
-- 比较所有not null 字段
select count(*) into v_equal_count from work_table_table_digoal t1,table_digoal t2
where t1.id=t2.id
and t1.cn_id = t2.cn_id
and t1.cn_name = t2.cn_name
and t1.cn_type = t2.cn_type
and t1.requestor = t2.requestor
and t1.cn_code = t2.cn_code
and t1.check_type = t2.check_type;
raise notice 'v_src_count:%, v_dst_count:%, v_equal_count:%',v_src_count,v_dst_count,v_equal_count;
if ( v_equal_count <> v_src_count ) then
truncate table ONLY table_digoal;
insert into table_digoal
(id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)
select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from work_table_table_digoal;
end if;
elsif ( v_src_count <> v_dst_count and v_src_count <> 0 ) then
truncate table ONLY table_digoal;
insert into table_digoal
(id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)
select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from work_table_table_digoal;
elsif v_src_count = 0 then
raise notice 'ERROR: src no data.';
return 1;
end if;
return 0;
exception when others then
raise notice 'ERROR: ';
raise notice 'v_src_count:%, v_dst_count:%, v_equal_count:%',v_src_count,v_dst_count,v_equal_count;
return 1;
end;
$$ language plpgsql;
调用这个函数进行同步时, 报错出现在
insert into work_table_table_digoal
(id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg)
select id,cn_id,cn_name,cn_type,requestor,cn_desc,create_time,cn_code,check_type,check_msg from ft_bill_table_digoal;
也就是使用外部表同步过来的报错.
这句SQL拿出来在psql环境中单独执行没有问题. 只是放在函数中就出问题. 后来我不使用pgsql_fdw, 而是通过dblink创建一个远程表的视图则不会报错了.
可能是个pgsql_fdw的BUG.
【nagios监控】
同步脚本
postgres@db-192-168-100-33-> cat sync_digoal.sh
#!/bin/bash
. /home/postgres/.bash_profile
echo -e "start sync digoal1;"
date +%F\ %T
psql -h 127.0.0.1 digoal digoal -c "select * from sync_digoal1()";
date +%F\ %T
echo -e "end sync digoal1;"
echo -e "start sync digoal2;"
date +%F\ %T
psql -h 127.0.0.1 digoal digoal -c "select * from sync_digoal2()";
date +%F\ %T
echo -e "end sync digoal2;"
执行计划
postgres@db-192-168-100-33-> crontab -l
# sync digoal_data from idc1
*/3 * * * * /home/postgres/script/sync_digoal/sync_digoal.sh >>/tmp/sync_digoal.log 2>&1
nagios配置
[root@db-192-168-100-33 tmp]# less /usr/local/nagios/etc/nrpe.cfg
command[check_sync_digoal]=/usr/local/nagios/libexec/check_sync_digoal.sh
nagios监控脚本
[root@db-192-168-100-33 tmp]# cat /usr/local/nagios/libexec/check_sync_digoal.sh
#!/bin/bash
. /home/nagios/.bash_profile
# FILE需和 postgresql crontab里面输出的文件一致
# ALIVE_MINUTES=20 表示20分钟内$FILE被修改过, 心跳存在. 否则心跳停止(告警).
FILE=/tmp/sync_digoal.log
ALIVE_MINUTES=20
ALIVE_CNT=0
# 文件是否存在
if [ -f $FILE ]; then
ALIVE_CNT=`find $FILE -mmin -$ALIVE_MINUTES -print|wc -l`
if [ $ALIVE_CNT -eq 1 ]; then
ERR_CNT=`tail -n 100 $FILE|grep -c ERROR`
if [ $ERR_CNT -ge 1 ]; then
tail -n 100 $FILE|grep ERROR
exit 2
else
exit 0
fi
else
echo -e "keepalive timeout $ALIVE_MINUTES mintues with FILE: $FILE."
exit 2
fi
else
echo -e "File: $FILE not exist."
exit 2
fi
【排错】
strace -s 1024 -p 执行该函数的backend pid
执行该函数
跟踪到 :
lsyscache.c\0L2440\0RgetTypeOutputInfo\0\0
错误来自lsyscache.c里面的getTypeOutputInfo函数 :
/*
* getTypeOutputInfo
*
* Get info needed for printing values of a type
*/
void
getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
{
HeapTuple typeTuple;
Form_pg_type pt;
typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type));
if (!HeapTupleIsValid(typeTuple))
elog(ERROR, "cache lookup failed for type %u", type);
pt = (Form_pg_type) GETSTRUCT(typeTuple);
if (!pt->typisdefined)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("type %s is only a shell",
format_type_be(type))));
if (!OidIsValid(pt->typoutput))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("no output function available for type %s",
format_type_be(type))));
*typOutput = pt->typoutput;
*typIsVarlena = (!pt->typbyval) && (pt->typlen == -1);
ReleaseSysCache(typeTuple);
}
或者在psql中设置一个高级选项VERBOSITY = verbose也可以输出详细的错误信息.
例如 :
digoal=# \set VERBOSITY verbose
digoal=# select * from f_test();
ERROR: XX000: cache lookup failed for type 0
CONTEXT: SQL statement "SELECT 1 from foreign_table_test limit 1"
PL/pgSQL function "f_test" line 3 at PERFORM
LOCATION: getTypeOutputInfo, lsyscache.c:2440
VERBOSITY可以设置三个选项, default, verbose, terse.
在pgsql_fdw模块中, 调用getTypeOutputInfo函数的地方,
pgsql_fdw.c :
getTypeOutputInfo(types[i], &out_func_oid, &isvarlena);
修改为 :
if ( types[i] != 0 )
{
getTypeOutputInfo(types[i], &out_func_oid, &isvarlena);
}
以及 ruleutils.c :
getTypeOutputInfo(constval->consttype,
&typoutput, &typIsVarlena);
修改为 :
if ( constval->consttype != 0 )
{
getTypeOutputInfo(constval->consttype,
&typoutput, &typIsVarlena);
}
重新编译pgsql_fdw模块, 重启数据库.
重新执行函数 : 报错变为 :
postgres=# select * from f_test();
ERROR: XX000: cache lookup failed for function 0
CONTEXT: SQL statement "SELECT 1 from test limit 1"
PL/pgSQL function "f_test" line 3 at PERFORM
LOCATION: fmgr_info_cxt_security, fmgr.c:216
在pgsql_fdw 模块中调用fmgr_info_cxt_security函数的没有, 但是调用了fmgr_info, 简介调用到了fmgr_info_cxt_security.
fmgr.c :
/*
* This routine fills a FmgrInfo struct, given the OID
* of the function to be called.
*
* The caller's CurrentMemoryContext is used as the fn_mcxt of the info
* struct; this means that any subsidiary data attached to the info struct
* (either by fmgr_info itself, or later on by a function call handler)
* will be allocated in that context. The caller must ensure that this
* context is at least as long-lived as the info struct itself. This is
* not a problem in typical cases where the info struct is on the stack or
* in freshly-palloc'd space. However, if one intends to store an info
* struct in a long-lived table, it's better to use fmgr_info_cxt.
*/
void
fmgr_info(Oid functionId, FmgrInfo *finfo)
{
fmgr_info_cxt(functionId, finfo, CurrentMemoryContext);
}
/*
* Fill a FmgrInfo struct, specifying a memory context in which its
* subsidiary data should go.
*/
void
fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
{
fmgr_info_cxt_security(functionId, finfo, mcxt, false);
}
pgsql_fdw.c :
fmgr_info(out_func_oid, &func);
【补充】
1. 创建一个pgsql_fdw外部表的视图, 把上面的函数中的外部表改为视图, 错误依旧.
postgres=# create view v_test as select * from test;
CREATE VIEW
postgres=# select * from v_test;
id | info
----+--------
1 | DIGOAL
(1 row)
postgres=# CREATE OR REPLACE FUNCTION public.f_test()
RETURNS integer
LANGUAGE plpgsql
AS $function$
declare
begin
perform 1 from v_test limit 1;
return 1;
end;
$function$;
CREATE FUNCTION
postgres=# select * from f_test();
ERROR: cache lookup failed for type 0
CONTEXT: SQL statement "SELECT 1 from v_test limit 1"
PL/pgSQL function "f_test" line 4 at PERFORM
STATEMENT: select * from f_test();
2. 在SQL函数中使用pgsql_fdw的外部表正常.
postgres=# create or replace function f_test1() returns int as $$
postgres$# select id from test limit 1;
postgres$# $$ language sql;
CREATE FUNCTION
postgres=# select * from f_test1();
f_test1
---------
1
(1 row)
3. 在plpgsql函数中使用oracle fdw的外部表没有异常.
charge_sync=> create or replace function f_test () returns int as $$
charge_sync$> declare
charge_sync$> begin
charge_sync$> perform 1 from ora_tbl limit 1;
charge_sync$> return 0;
charge_sync$> end;
charge_sync$> $$ language plpgsql;
CREATE FUNCTION
charge_sync=> select * from f_test();
f_test
--------
0
(1 row)
【参考】
pgsql_fdw.c
src/backend/utils/fmgr/fmgr.c
src/backend/utils/cache/lsyscache.c