PostgreSQL Oracle 兼容性之 - 函数、类型、多国语言

6 minute read

背景

PostgreSQL的FDW支持在PostgreSQL中使用外部表的方式直接访问其他数据源的数据,就如同访问本地表一样。

FDW接口是开放的,所以任何人都可以开发对应外部数据源的接口,比如oracle_fdw就是用来访问Oracle数据库的。

FDW开发接口中,代码中会包括数据的转换,函数的转换,等

所以我们从oracle_fdw这个插件的代码中,可以看到Oracle与PostgreSQL的类型、函数、多国语言等映射关系。

正文

PostgreSQL与Oracle函数映射关系

小写为PostgreSQL函数

大写为Oracle函数

		case T_FuncExpr:  
			func = (FuncExpr *)expr;  
  
			if (! canHandleType(func->funcresulttype))  
				return NULL;  
  
			/* do nothing for implicit casts */  
			if (func->funcformat == COERCE_IMPLICIT_CAST)  
				return deparseExpr(session, foreignrel, linitial(func->args), oraTable, params);  
  
			/* get function name and schema */  
			tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(func->funcid));  
			if (! HeapTupleIsValid(tuple))  
			{  
				elog(ERROR, "cache lookup failed for function %u", func->funcid);  
			}  
			opername = pstrdup(((Form_pg_proc)GETSTRUCT(tuple))->proname.data);  
			schema = ((Form_pg_proc)GETSTRUCT(tuple))->pronamespace;  
			ReleaseSysCache(tuple);  
  
			/* ignore functions in other than the pg_catalog schema */  
			if (schema != PG_CATALOG_NAMESPACE)  
				return NULL;  
  
			/* the "normal" functions that we can translate */  
			if (strcmp(opername, "abs") == 0  
				|| strcmp(opername, "acos") == 0  
				|| strcmp(opername, "asin") == 0  
				|| strcmp(opername, "atan") == 0  
				|| strcmp(opername, "atan2") == 0  
				|| strcmp(opername, "ceil") == 0  
				|| strcmp(opername, "ceiling") == 0  
				|| strcmp(opername, "char_length") == 0  
				|| strcmp(opername, "character_length") == 0  
				|| strcmp(opername, "concat") == 0  
				|| strcmp(opername, "cos") == 0  
				|| strcmp(opername, "exp") == 0  
				|| strcmp(opername, "initcap") == 0  
				|| strcmp(opername, "length") == 0  
				|| strcmp(opername, "lower") == 0  
				|| strcmp(opername, "lpad") == 0  
				|| strcmp(opername, "ltrim") == 0  
				|| strcmp(opername, "mod") == 0  
				|| strcmp(opername, "octet_length") == 0  
				|| strcmp(opername, "position") == 0  
				|| strcmp(opername, "pow") == 0  
				|| strcmp(opername, "power") == 0  
				|| strcmp(opername, "replace") == 0  
				|| strcmp(opername, "round") == 0  
				|| strcmp(opername, "rpad") == 0  
				|| strcmp(opername, "rtrim") == 0  
				|| strcmp(opername, "sign") == 0  
				|| strcmp(opername, "sin") == 0  
				|| strcmp(opername, "sqrt") == 0  
				|| strcmp(opername, "strpos") == 0  
				|| strcmp(opername, "substr") == 0  
				|| (strcmp(opername, "substring") == 0 && list_length(func->args) == 3)  
				|| strcmp(opername, "tan") == 0  
				|| strcmp(opername, "to_char") == 0  
				|| strcmp(opername, "to_date") == 0  
				|| strcmp(opername, "to_number") == 0  
				|| strcmp(opername, "to_timestamp") == 0  
				|| strcmp(opername, "translate") == 0  
				|| strcmp(opername, "trunc") == 0  
				|| strcmp(opername, "upper") == 0)  
			{  
				initStringInfo(&result);  
  
				if (strcmp(opername, "ceiling") == 0)  
					appendStringInfo(&result, "CEIL(");  
				else if (strcmp(opername, "char_length") == 0  
						|| strcmp(opername, "character_length") == 0)  
					appendStringInfo(&result, "LENGTH(");  
				else if (strcmp(opername, "pow") == 0)  
					appendStringInfo(&result, "POWER(");  
				else if (strcmp(opername, "octet_length") == 0)  
					appendStringInfo(&result, "LENGTHB(");  
				else if (strcmp(opername, "position") == 0  
						|| strcmp(opername, "strpos") == 0)  
					appendStringInfo(&result, "INSTR(");  
				else if (strcmp(opername, "substring") == 0)  
					appendStringInfo(&result, "SUBSTR(");  
				else  
					appendStringInfo(&result, "%s(", opername);  
  
				first_arg = true;  
				foreach(cell, func->args)  
				{  
					arg = deparseExpr(session, foreignrel, lfirst(cell), oraTable, params);  
					if (arg == NULL)  
					{  
						pfree(result.data);  
						pfree(opername);  
						return NULL;  
					}  
  
					if (first_arg)  
					{  
						first_arg = false;  
						appendStringInfo(&result, "%s", arg);  
					}  
					else  
					{  
						appendStringInfo(&result, ", %s", arg);  
					}  
					pfree(arg);  
				}  
  
				appendStringInfo(&result, ")");  
			}  
			else if (strcmp(opername, "date_part") == 0)  
			{  
				/* special case: EXTRACT */  
				left = deparseExpr(session, foreignrel, linitial(func->args), oraTable, params);  
				if (left == NULL)  
				{  
					pfree(opername);  
					return NULL;  
				}  
  
				/* can only handle these fields in Oracle */  
				if (strcmp(left, "'year'") == 0  
					|| strcmp(left, "'month'") == 0  
					|| strcmp(left, "'day'") == 0  
					|| strcmp(left, "'hour'") == 0  
					|| strcmp(left, "'minute'") == 0  
					|| strcmp(left, "'second'") == 0  
					|| strcmp(left, "'timezone_hour'") == 0  
					|| strcmp(left, "'timezone_minute'") == 0)  
				{  
					/* remove final quote */  
					left[strlen(left) - 1] = '\0';  
  
					right = deparseExpr(session, foreignrel, lsecond(func->args), oraTable, params);  
					if (right == NULL)  
					{  
						pfree(opername);  
						pfree(left);  
						return NULL;  
					}  
  
					initStringInfo(&result);  
					appendStringInfo(&result, "EXTRACT(%s FROM %s)", left + 1, right);  
				}  
				else  
				{  
					pfree(opername);  
					pfree(left);  
					return NULL;  
				}  
  
				pfree(left);  
				pfree(right);  
			}  
			else if (strcmp(opername, "now") == 0 || strcmp(opername, "transaction_timestamp") == 0)  
			{  
				/* special case: current timestamp */  
				initStringInfo(&result);  
				appendStringInfo(&result, "(CAST (:now AS TIMESTAMP WITH TIME ZONE))");  
			}  
			else  
			{  
				/* function that we cannot render for Oracle */  
				pfree(opername);  
				return NULL;  
			}  

PostgreSQL与Oracle类型映射关系

大写为Oracle类型

小写为PostgreSQL类型

			switch (type)  
			{  
				case ORA_TYPE_CHAR:  
				case ORA_TYPE_NCHAR:  
					appendStringInfo(&buf, "character(%d)", charlen == 0 ? 1 : charlen);  
					break;  
				case ORA_TYPE_VARCHAR2:  
				case ORA_TYPE_NVARCHAR2:  
					appendStringInfo(&buf, "character varying(%d)", charlen == 0 ? 1 : charlen);  
					break;  
				case ORA_TYPE_CLOB:  
				case ORA_TYPE_LONG:  
					appendStringInfo(&buf, "text");  
					break;  
				case ORA_TYPE_NUMBER:  
					if (typeprec == 0)  
						appendStringInfo(&buf, "numeric");  
					else if (typescale == 0)  
					{  
						if (typeprec < 5)  
							appendStringInfo(&buf, "smallint");  
						else if (typeprec < 10)  
							appendStringInfo(&buf, "integer");  
						else if (typeprec < 19)  
							appendStringInfo(&buf, "bigint");  
						else  
							appendStringInfo(&buf, "numeric(%d)", typeprec);  
					}  
					else  
						appendStringInfo(&buf, "numeric(%d, %d)", typeprec, typescale);  
					break;  
				case ORA_TYPE_FLOAT:  
					if (typeprec < 54)  
						appendStringInfo(&buf, "float(%d)", typeprec);  
					else  
						appendStringInfo(&buf, "numeric");  
					break;  
				case ORA_TYPE_BINARYFLOAT:  
					appendStringInfo(&buf, "real");  
					break;  
				case ORA_TYPE_BINARYDOUBLE:  
					appendStringInfo(&buf, "double precision");  
					break;  
				case ORA_TYPE_RAW:  
				case ORA_TYPE_BLOB:  
				case ORA_TYPE_BFILE:  
				case ORA_TYPE_LONGRAW:  
					appendStringInfo(&buf, "bytea");  
					break;  
				case ORA_TYPE_DATE:  
					appendStringInfo(&buf, "timestamp(0) without time zone");  
					break;  
				case ORA_TYPE_TIMESTAMP:  
					appendStringInfo(&buf, "timestamp(%d) without time zone", (typescale > 6) ? 6 : typescale);  
					break;  
				case ORA_TYPE_TIMESTAMPTZ:  
					appendStringInfo(&buf, "timestamp(%d) with time zone", (typescale > 6) ? 6 : typescale);  
					break;  
				case ORA_TYPE_INTERVALD2S:  
					appendStringInfo(&buf, "interval(%d)", (typescale > 6) ? 6 : typescale);  
					break;  
				case ORA_TYPE_INTERVALY2M:  
					appendStringInfo(&buf, "interval(0)");  
					break;  
				case ORA_TYPE_GEOMETRY:  
					if (GEOMETRYOID != InvalidOid)  
					{  
						appendStringInfo(&buf, "geometry");  
						break;  
					}  
					/* fall through */  
				default:  
					elog(DEBUG2, "column \"%s\" of table \"%s\" has an untranslatable data type", colname, tabname);  
					appendStringInfo(&buf, "text");  
			}  

Mapping Oracle to PostgreSQL data types

PostgreSQL与Oracle多国语言映射关系

server_encoding对应的为PostgreSQL字符集

charset对应的为Oracle字符集

	if (nls_lang == NULL)  
	{  
		server_encoding = pstrdup(GetConfigOption("server_encoding", false, true));  
  
		/* find an Oracle client character set that matches the database encoding */  
		if (strcmp(server_encoding, "UTF8") == 0)  
			charset = "AL32UTF8";  
		else if (strcmp(server_encoding, "EUC_JP") == 0)  
			charset = "JA16EUC";  
		else if (strcmp(server_encoding, "EUC_JIS_2004") == 0)  
			charset = "JA16SJIS";  
		else if (strcmp(server_encoding, "EUC_TW") == 0)  
			charset = "ZHT32EUC";  
		else if (strcmp(server_encoding, "ISO_8859_5") == 0)  
			charset = "CL8ISO8859P5";  
		else if (strcmp(server_encoding, "ISO_8859_6") == 0)  
			charset = "AR8ISO8859P6";  
		else if (strcmp(server_encoding, "ISO_8859_7") == 0)  
			charset = "EL8ISO8859P7";  
		else if (strcmp(server_encoding, "ISO_8859_8") == 0)  
			charset = "IW8ISO8859P8";  
		else if (strcmp(server_encoding, "KOI8R") == 0)  
			charset = "CL8KOI8R";  
		else if (strcmp(server_encoding, "KOI8U") == 0)  
			charset = "CL8KOI8U";  
		else if (strcmp(server_encoding, "LATIN1") == 0)  
			charset = "WE8ISO8859P1";  
		else if (strcmp(server_encoding, "LATIN2") == 0)  
			charset = "EE8ISO8859P2";  
		else if (strcmp(server_encoding, "LATIN3") == 0)  
			charset = "SE8ISO8859P3";  
		else if (strcmp(server_encoding, "LATIN4") == 0)  
			charset = "NEE8ISO8859P4";  
		else if (strcmp(server_encoding, "LATIN5") == 0)  
			charset = "WE8ISO8859P9";  
		else if (strcmp(server_encoding, "LATIN6") == 0)  
			charset = "NE8ISO8859P10";  
		else if (strcmp(server_encoding, "LATIN7") == 0)  
			charset = "BLT8ISO8859P13";  
		else if (strcmp(server_encoding, "LATIN8") == 0)  
			charset = "CEL8ISO8859P14";  
		else if (strcmp(server_encoding, "LATIN9") == 0)  
			charset = "WE8ISO8859P15";  
		else if (strcmp(server_encoding, "WIN866") == 0)  
			charset = "RU8PC866";  
		else if (strcmp(server_encoding, "WIN1250") == 0)  
			charset = "EE8MSWIN1250";  
		else if (strcmp(server_encoding, "WIN1251") == 0)  
			charset = "CL8MSWIN1251";  
		else if (strcmp(server_encoding, "WIN1252") == 0)  
			charset = "WE8MSWIN1252";  
		else if (strcmp(server_encoding, "WIN1253") == 0)  
			charset = "EL8MSWIN1253";  
		else if (strcmp(server_encoding, "WIN1254") == 0)  
			charset = "TR8MSWIN1254";  
		else if (strcmp(server_encoding, "WIN1255") == 0)  
			charset = "IW8MSWIN1255";  
		else if (strcmp(server_encoding, "WIN1256") == 0)  
			charset = "AR8MSWIN1256";  
		else if (strcmp(server_encoding, "WIN1257") == 0)  
			charset = "BLT8MSWIN1257";  
		else if (strcmp(server_encoding, "WIN1258") == 0)  
			charset = "VN8MSWIN1258";  
		else  
		{  
			/* warn if we have to resort to 7-bit ASCII */  
			charset = "US7ASCII";  
  
			ereport(WARNING,  
					(errcode(ERRCODE_WARNING),  
					errmsg("no Oracle character set for database encoding \"%s\"", server_encoding),  
					errdetail("All but ASCII characters will be lost."),  
					errhint("You can set the option \"%s\" on the foreign data wrapper to force an Oracle character set.", OPT_NLS_LANG)));  
		}  
  
		lc_messages = pstrdup(GetConfigOption("lc_messages", false, true));  
		/* try to guess those for which there is a backend translation */  
		if (strncmp(lc_messages, "de_", 3) == 0 || pg_strncasecmp(lc_messages, "german", 6) == 0)  
			language = "GERMAN_GERMANY";  
		if (strncmp(lc_messages, "es_", 3) == 0 || pg_strncasecmp(lc_messages, "spanish", 7) == 0)  
			language = "SPANISH_SPAIN";  
		if (strncmp(lc_messages, "fr_", 3) == 0 || pg_strncasecmp(lc_messages, "french", 6) == 0)  
			language = "FRENCH_FRANCE";  
		if (strncmp(lc_messages, "ja_", 3) == 0 || pg_strncasecmp(lc_messages, "japanese", 8) == 0)  
			language = "JAPANESE_JAPAN";  
		if (strncmp(lc_messages, "pt_", 3) == 0 || pg_strncasecmp(lc_messages, "portuguese", 10) == 0)  
			language = "BRAZILIAN PORTUGUESE_BRAZIL";  
		if (strncmp(lc_messages, "tr_", 3) == 0 || pg_strncasecmp(lc_messages, "turkish", 7) == 0)  
			language = "TURKISH_TURKEY";  
		if (strncmp(lc_messages, "zh_CN", 5) == 0 || pg_strncasecmp(lc_messages, "chinese-simplified", 18) == 0)  
			language = "SIMPLIFIED CHINESE_CHINA";  
		if (strncmp(lc_messages, "zh_TW", 5) == 0 || pg_strncasecmp(lc_messages, "chinese-traditional", 19) == 0)  
			language = "TRADITIONAL CHINESE_TAIWAN";  
  
		appendStringInfo(&buf, "NLS_LANG=%s.%s", language, charset);  
	}  
	else  
	{  
		appendStringInfo(&buf, "NLS_LANG=%s", nls_lang);  
	}  

参考

http://api.pgxn.org/src/oracle_fdw/oracle_fdw-1.5.0/oracle_fdw.c

不在这里的通过orafce插件,扩展。

《PostgreSQL Oracle 兼容性系列之 - orafce介绍》

《PostgreSQL Oracle兼容性之 - orafce (包、函数、DUAL)》

Flag Counter

digoal’s 大量PostgreSQL文章入口