Use PostgreSQL server program import binary data into database bytea type
背景
如何将数据库服务器上的文件导入PostgreSQL数据库. 例如图片.
方法1
使用pg_read_binary_file函数.
如下 :
postgres=# select pg_read_binary_file('postgresql.conf');
因为postgresql.conf在$PGDATA, 下面所以可以读取.
如果是绝对路径或者不在$PGDATA或它的子目录下将报错 :
postgres=# select pg_read_binary_file('/home/ocz/t.sh');
ERROR: absolute path not allowed
postgres=# select pg_read_binary_file('../postgresql.conf');
ERROR: path must be in or below the current directory
除了读全文件以外, read_binary_file还支持offset和limit.
方法1除了对文件的位置做了限制外, 其实还是很好用的.
方法2
使用大对象,不受文件路径限制.
postgres=# select lo_import('/home/ocz/t.sh');
lo_import
-----------
3475818
(1 row)
/home/ocz/t.sh被导入pg_largeobject中, 在data字段中.
postgres=# \d pg_largeobject
Table "pg_catalog.pg_largeobject"
Column | Type | Modifiers
--------+---------+-----------
loid | oid | not null
pageno | integer | not null
data | bytea |
Indexes:
"pg_largeobject_loid_pn_index" UNIQUE, btree (loid, pageno)
查询
postgres=# select data from pg_largeobject where loid=3475818;
当然还有更细致的查询, 如data的offset, limit.
大对象的使用例子可参考如下 :
http://blog.163.com/digoal@126/blog/static/16387704020130931040444/
方法2是集中式的管理, 虽然可以读任意位置的文件, 但是如果想在自己的表中存储bytea, 那么方法2不适合, 除非你先导入大对象,然后从大对象转移到普通表的bytea中.
方法3
解决方法1 和 方法2 的弊端.
自建读文件的函数 :
vi readfile.c
#include "postgres.h"
#include <sys/file.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "postmaster/syslogger.h"
#include "storage/fd.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#include "fmgr.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(cu_pg_read_binary_file_all);
/*
* Convert a "text" filename argument to C string, and check it's allowable.
*
* Filename may be absolute or relative to the DataDir, but we only allow
* absolute paths that match DataDir or Log_directory.
*/
static char *
cu_convert_and_check_filename(text *arg)
{
char *filename;
filename = text_to_cstring(arg);
canonicalize_path(filename); /* filename can change length here */
return filename;
}
Datum
cu_pg_read_binary_file_all(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_P(0);
char *filename;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to read files"))));
filename = cu_convert_and_check_filename(filename_t);
PG_RETURN_BYTEA_P(read_binary_file(filename, 0, -1));
}
编译 :
gcc -O3 -Wall -Wextra -Werror -I /home/ocz/postgresql-9.2.1/src/include -g -fPIC -c ./readfile.c -o readfile.o
gcc -O3 -Wall -Wextra -Werror -I /home/ocz/postgresql-9.2.1/src/include -g -shared readfile.o -o libreadfile.so
拷贝到库目录 :
cp libreadfile.so $PGHOME/lib/
创建函数 :
psql digoal digoal
digoal=# create or replace function cu_pg_read_binary_file(text) returns bytea as '$libdir/libreadfile.so', 'cu_pg_read_binary_file_all' language C STRICT;
使用 :
digoal=# select cu_pg_read_binary_file('/home/ocz/t.sh');
方法三的好处是, 不再受文件位置的限制.
参考
1. http://blog.163.com/digoal@126/blog/static/16387704020130931040444/
2. src/backend/utils/adt/genfile.c
这个函数包含了文件位置检测, 所以pg_read_binary_file不能读绝对路径以及$PGDATA之外的路径原因就在此.
00041 /*
00042 * Convert a "text" filename argument to C string, and check it's allowable.
00043 *
00044 * Filename may be absolute or relative to the DataDir, but we only allow
00045 * absolute paths that match DataDir or Log_directory.
00046 */
00047 static char *
00048 convert_and_check_filename(text *arg)
00049 {
00050 char *filename;
00051
00052 filename = text_to_cstring(arg);
00053 canonicalize_path(filename); /* filename can change length here */
00054
00055 if (is_absolute_path(filename))
00056 {
00057 /* Disallow '/a/b/data/..' */
00058 if (path_contains_parent_reference(filename))
00059 ereport(ERROR,
00060 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00061 (errmsg("reference to parent directory (\"..\") not allowed"))));
00062
00063 /*
00064 * Allow absolute paths if within DataDir or Log_directory, even
00065 * though Log_directory might be outside DataDir.
00066 */
00067 if (!path_is_prefix_of_path(DataDir, filename) &&
00068 (!is_absolute_path(Log_directory) ||
00069 !path_is_prefix_of_path(Log_directory, filename)))
00070 ereport(ERROR,
00071 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00072 (errmsg("absolute path not allowed"))));
00073 }
00074 else if (!path_is_relative_and_below_cwd(filename))
00075 ereport(ERROR,
00076 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
00077 (errmsg("path must be in or below the current directory"))));
00078
00079 return filename;
00080 }
PostgreSQL 10例子(以yum 安装的postgresql为例, 注意devel包也要安装好)
#include "postgres.h"
#include <sys/file.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
//#include "access/htup_details.h"
#include "access/xlog_internal.h"
#include "catalog/pg_type.h"
//#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "postmaster/syslogger.h"
#include "storage/fd.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#include "fmgr.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(cu_pg_read_binary_file);
PG_FUNCTION_INFO_V1(cu_pg_read_file);
/*
* Convert a "text" filename argument to C string, and check it's allowable.
*
* Filename may be absolute or relative to the DataDir, but we only allow
* absolute paths that match DataDir or Log_directory.
*/
static char *
cu_convert_and_check_filename(text *arg)
{
char *filename;
filename = text_to_cstring(arg);
canonicalize_path(filename); /* filename can change length here */
return filename;
}
/*
* Read a section of a file, returning it as bytea
*
* Caller is responsible for all permissions checking.
*
* We read the whole of the file when bytes_to_read is negative.
*/
static bytea *
read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
bool missing_ok)
{
bytea *buf;
size_t nbytes;
FILE *file;
if (bytes_to_read < 0)
{
if (seek_offset < 0)
bytes_to_read = -seek_offset;
else
{
struct stat fst;
if (stat(filename, &fst) < 0)
{
if (missing_ok && errno == ENOENT)
return NULL;
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", filename)));
}
bytes_to_read = fst.st_size - seek_offset;
}
}
/* not sure why anyone thought that int64 length was a good idea */
if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length too large")));
if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
{
if (missing_ok && errno == ENOENT)
return NULL;
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" for reading: %m",
filename)));
}
if (fseeko(file, (off_t) seek_offset,
(seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not seek in file \"%s\": %m", filename)));
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
if (ferror(file))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", filename)));
SET_VARSIZE(buf, nbytes + VARHDRSZ);
FreeFile(file);
return buf;
}
/*
* Similar to read_binary_file, but we verify that the contents are valid
* in the database encoding.
*/
static text *
read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
bool missing_ok)
{
bytea *buf;
buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
if (buf != NULL)
{
/* Make sure the input is valid */
pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
/* OK, we can cast it to text safely */
return (text *) buf;
}
else
return NULL;
}
/*
* Read a section of a file, returning it as text
*/
Datum
cu_pg_read_file(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = 0;
int64 bytes_to_read = -1;
bool missing_ok = false;
char *filename;
text *result;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to read files"))));
/* handle optional arguments */
if (PG_NARGS() >= 3)
{
seek_offset = PG_GETARG_INT64(1);
bytes_to_read = PG_GETARG_INT64(2);
if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
}
if (PG_NARGS() >= 4)
missing_ok = PG_GETARG_BOOL(3);
filename = cu_convert_and_check_filename(filename_t);
result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
if (result)
PG_RETURN_TEXT_P(result);
else
PG_RETURN_NULL();
}
/*
* Read a section of a file, returning it as bytea
*/
Datum
cu_pg_read_binary_file(PG_FUNCTION_ARGS)
{
text *filename_t = PG_GETARG_TEXT_PP(0);
int64 seek_offset = 0;
int64 bytes_to_read = -1;
bool missing_ok = false;
char *filename;
bytea *result;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to read files"))));
/* handle optional arguments */
if (PG_NARGS() >= 3)
{
seek_offset = PG_GETARG_INT64(1);
bytes_to_read = PG_GETARG_INT64(2);
if (bytes_to_read < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length cannot be negative")));
}
if (PG_NARGS() >= 4)
missing_ok = PG_GETARG_BOOL(3);
filename = cu_convert_and_check_filename(filename_t);
result = read_binary_file(filename, seek_offset,
bytes_to_read, missing_ok);
if (result)
PG_RETURN_BYTEA_P(result);
else
PG_RETURN_NULL();
}
gcc -O3 -Wall -Wextra -Werror -I/usr/pgsql-10/include/server -g -fPIC -c ./readfile.c -o readfile.o
gcc -O3 -Wall -Wextra -Werror -I/usr/pgsql-10/include/server -g -shared readfile.o -o libreadfile.so
cp libreadfile.so /usr/pgsql-10/lib/
postgres=# create or replace function cu_pg_read_binary_file(text) returns bytea as '$libdir/libreadfile.so', 'cu_pg_read_binary_file' language C STRICT;
CREATE FUNCTION
postgres=# create or replace function cu_pg_read_file(text) returns text as '$libdir/libreadfile.so', 'cu_pg_read_file' language C STRICT;
CREATE FUNCTION
postgres=# select cu_pg_read_binary_file('/var/lib/pgsql/test1.sql');
cu_pg_read_binary_file
--------------------------------------------------------------------------------------------------------------------------------------------------------
\x5c7365742069642072616e646f6d28312c313030303030303030290a75706461746520746573743120736574206372745f74696d653d6e6f7728292077686572652069643d3a69643b0a
(1 row)
postgres=# select cu_pg_read_file('/var/lib/pgsql/test1.sql');
cu_pg_read_file
-----------------------------------------------
\set id random(1,100000000) +
update test1 set crt_time=now() where id=:id;+
(1 row)