oracle utl_file 从 utf8 编码
oracle utl_file encoding from utf8
我喜欢将大量文本数据从数据库导出到文件。
db中的字符集是UTF8。
文件中的异常结果为 ISO8859P2 或 MSWIN1250。
我的数据库设置:
SELECT * FROM v$nls_parameters;
1 NLS_LANGUAGE HUNGARIAN 0
2 NLS_TERRITORY HUNGARY 0
9 NLS_CHARACTERSET UTF8 0
10 NLS_SORT HUNGARIAN 0
16 NLS_NCHAR_CHARACTERSET UTF8 0
17 NLS_COMP BINARY 0
18 NLS_LENGTH_SEMANTICS CHAR 0
19 NLS_NCHAR_CONV_EXCP FALSE 0
select * from nls_database_parameters;
1 NLS_RDBMS_VERSION 12.1.0.2.0
2 NLS_NCHAR_CONV_EXCP FALSE
15 NLS_NCHAR_CHARACTERSET UTF8
16 NLS_CHARACTERSET UTF8
19 NLS_TERRITORY AMERICA
20 NLS_LANGUAGE AMERICAN
select * from nls_session_parameters;
1 NLS_LANGUAGE HUNGARIAN
2 NLS_TERRITORY HUNGARY
9 NLS_SORT HUNGARIAN
15 NLS_COMP BINARY
16 NLS_LENGTH_SEMANTICS CHAR
17 NLS_NCHAR_CONV_EXCP FALSE
在服务器目录中创建的文件 (linux)。我没有关于 linux 字符集设置的更多信息。
PLSQL代码:
DECLARE
v_fh UTL_FILE.FILE_TYPE;
v_eol VARCHAR2(2);
v_eollen PLS_INTEGER;
CURSOR cur_sql IS
SELECT T3.ID_RESULT
,T3.column1
FROM table1
WHERE id_result = 999999
;
"ID_RESULT" DBMS_SQL.NUMBER_TABLE;
"column1" DBMS_SQL.VARCHAR2A;
BEGIN
EXECUTE IMMEDIATE 'ALTER SESSION SET NLS_DATE_FORMAT = ''YYYY.MM.DD HH24:MI:SS''';
v_eol := CHR(13)||CHR(10);
v_eollen := LENGTH(v_eol);
v_fh := UTL_FILE.FOPEN('REP_DIR','result_test.csv','W', 32000);
OPEN cur_sql;
LOOP
FETCH cur_sql
BULK COLLECT INTO "ID_RESULT",
"column1",
LIMIT 1000;
IF "ID_RESULT".COUNT > 0 THEN
FOR i IN "ID_RESULT".FIRST .. "ID_RESULT".LAST LOOP
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2','UTF8'));
UTL_FILE.PUT_nchar(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250','UTF8'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, "column1"(i));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, utl_raw.cast_to_varchar2(utl_raw.convert(utl_raw.cast_to_raw("column1"(i) ),'HUNGARIAN_HUNGARY.EE8MSWIN1250', 'ENGLISH_UNITED KINGDOM.UTF8')));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.fflush(v_fh);
END LOOP;
END IF;
EXIT WHEN cur_sql%NOTFOUND;
END LOOP;
CLOSE cur_sql;
UTL_FILE.FCLOSE(v_fh);
EXCEPTION
WHEN
.........
RAISE;
END;
数据库中的原始值:
value in hexa
Notepad++ 中的结果(UTF8 编码):
替补拉约森
替补拉约森
替补拉约森
替补拉约森
Csere Lajos
替补拉约森
Notepad++中的结果(ANSI编码,字符集:windows-1250):
替补拉约森
替补拉约森
替补拉约森
替补拉约森
Csere Lajosné
替补拉约森
Notepad++ 中的结果(以 ANSI 编码,字符集:iso-8859-2):
替补拉约森
替补拉约森
替补拉约森
替补拉约森
改变 LajosnĂŠ
替补拉约森
当我使用 CONVERT 时,我丢失了“é”字符。
我怎样才能将字符串从 UTF8 转换为 ANSI?
谢谢,
佐尔坦
这是我的工具箱包 - 试试 it/tweak。使用不同的代码页进出。
function dump_dsv_fast(p_query in varchar2
,p_filename in varchar2
,p_dir in varchar2 default c_DEFAULT_DIRECTORY
,p_separator in varchar2 default ';'
,p_text_qualifier in varchar2 default ''
,p_header in boolean default true
,p_eol_format in varchar2 default chr(13) || chr(10)
,p_characterset in varchar2 default 'EE8MSWIN1250'
,p_write_mode in varchar2 default 'WB') return number
/****************************************************
Formats:
AL32UTF8 --> full utf
WE8MSWIN1252 --> no pl chars
WE8MSWIN1250 --> pl ansi
Limitations: this one is faster filing buffer once
* 2014/05/13 --> change to raw (end line character)
* 2016/02/12 --> text qualifier
--> header on/off
--> line length adjust
--> text q
* 2016/03/09 --> codepage
*****************************************************/
is
l_output utl_file.file_type;
l_theCursor integer default dbms_sql.open_cursor;
l_columnValue varchar2(32760);
l_status integer;
l_separator varchar2(10);
l_colCnt number default 0;
l_descTbl dbms_sql.desc_tab;
l_cnt number default 0;
l_buffer raw(32767) := null;
begin
l_output := utl_file.fopen(p_dir, p_filename, p_write_mode, 32760);
dbms_sql.parse(l_theCursor, p_query, dbms_sql.native);
if p_header
then
dbms_sql.describe_columns(l_theCursor, l_colCnt, l_descTbl);
l_separator := '';
for i in 1 .. l_colCnt
loop
if p_characterset = 'EE8MSWIN1250'
then
utl_file.put_raw(l_output,
utl_raw.cast_to_raw(l_separator ||
p_text_qualifier || l_descTbl(i)
.col_name || p_text_qualifier));
else
utl_file.put_raw(l_output,
utl_i18n.string_to_raw(l_separator ||
p_text_qualifier || l_descTbl(i)
.col_name ||
p_text_qualifier,
p_characterset));
end if;
dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
l_separator := p_separator;
end loop;
utl_file.put_raw(l_output, utl_raw.cast_to_raw(p_eol_format));
end if;
for i in 1 .. 255
loop
begin
dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
l_colCnt := i;
exception
when others then
if (sqlcode = -1007)
then
exit;
else
raise;
end if;
end;
end loop;
dbms_sql.define_column(l_theCursor, 1, l_columnValue, 32760);
l_status := dbms_sql.execute(l_theCursor);
loop
exit when(dbms_sql.fetch_rows(l_theCursor) <= 0);
l_separator := '';
for i in 1 .. l_colCnt
loop
dbms_sql.column_value(l_theCursor, i, l_columnValue);
if p_characterset = 'EE8MSWIN1250'
then
l_buffer := l_buffer ||
utl_raw.cast_to_raw(l_separator || p_text_qualifier ||
l_columnValue || p_text_qualifier);
else
l_buffer := l_buffer ||
utl_i18n.string_to_raw(l_separator || p_text_qualifier ||
l_columnValue ||
p_text_qualifier, p_characterset);
end if;
l_separator := p_separator;
end loop;
utl_file.put_raw(l_output,
l_buffer || utl_raw.cast_to_raw(p_eol_format));
l_buffer := '';
l_cnt := l_cnt + 1;
end loop;
dbms_sql.close_cursor(l_theCursor);
utl_file.fclose(l_output);
return l_cnt;
end dump_dsv_fast;
我喜欢将大量文本数据从数据库导出到文件。 db中的字符集是UTF8。 文件中的异常结果为 ISO8859P2 或 MSWIN1250。
我的数据库设置:
SELECT * FROM v$nls_parameters;
1 NLS_LANGUAGE HUNGARIAN 0
2 NLS_TERRITORY HUNGARY 0
9 NLS_CHARACTERSET UTF8 0
10 NLS_SORT HUNGARIAN 0
16 NLS_NCHAR_CHARACTERSET UTF8 0
17 NLS_COMP BINARY 0
18 NLS_LENGTH_SEMANTICS CHAR 0
19 NLS_NCHAR_CONV_EXCP FALSE 0
select * from nls_database_parameters;
1 NLS_RDBMS_VERSION 12.1.0.2.0
2 NLS_NCHAR_CONV_EXCP FALSE
15 NLS_NCHAR_CHARACTERSET UTF8
16 NLS_CHARACTERSET UTF8
19 NLS_TERRITORY AMERICA
20 NLS_LANGUAGE AMERICAN
select * from nls_session_parameters;
1 NLS_LANGUAGE HUNGARIAN
2 NLS_TERRITORY HUNGARY
9 NLS_SORT HUNGARIAN
15 NLS_COMP BINARY
16 NLS_LENGTH_SEMANTICS CHAR
17 NLS_NCHAR_CONV_EXCP FALSE
在服务器目录中创建的文件 (linux)。我没有关于 linux 字符集设置的更多信息。
PLSQL代码:
DECLARE
v_fh UTL_FILE.FILE_TYPE;
v_eol VARCHAR2(2);
v_eollen PLS_INTEGER;
CURSOR cur_sql IS
SELECT T3.ID_RESULT
,T3.column1
FROM table1
WHERE id_result = 999999
;
"ID_RESULT" DBMS_SQL.NUMBER_TABLE;
"column1" DBMS_SQL.VARCHAR2A;
BEGIN
EXECUTE IMMEDIATE 'ALTER SESSION SET NLS_DATE_FORMAT = ''YYYY.MM.DD HH24:MI:SS''';
v_eol := CHR(13)||CHR(10);
v_eollen := LENGTH(v_eol);
v_fh := UTL_FILE.FOPEN('REP_DIR','result_test.csv','W', 32000);
OPEN cur_sql;
LOOP
FETCH cur_sql
BULK COLLECT INTO "ID_RESULT",
"column1",
LIMIT 1000;
IF "ID_RESULT".COUNT > 0 THEN
FOR i IN "ID_RESULT".FIRST .. "ID_RESULT".LAST LOOP
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2','UTF8'));
UTL_FILE.PUT_nchar(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250','UTF8'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250'));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, "column1"(i));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.PUT(v_fh, utl_raw.cast_to_varchar2(utl_raw.convert(utl_raw.cast_to_raw("column1"(i) ),'HUNGARIAN_HUNGARY.EE8MSWIN1250', 'ENGLISH_UNITED KINGDOM.UTF8')));
UTL_FILE.PUT(v_fh, v_eol);
UTL_FILE.fflush(v_fh);
END LOOP;
END IF;
EXIT WHEN cur_sql%NOTFOUND;
END LOOP;
CLOSE cur_sql;
UTL_FILE.FCLOSE(v_fh);
EXCEPTION
WHEN
.........
RAISE;
END;
数据库中的原始值: value in hexa
Notepad++ 中的结果(UTF8 编码):
替补拉约森
替补拉约森
替补拉约森
替补拉约森
Csere Lajos
替补拉约森
Notepad++中的结果(ANSI编码,字符集:windows-1250):
替补拉约森
替补拉约森
替补拉约森
替补拉约森
Csere Lajosné
替补拉约森
Notepad++ 中的结果(以 ANSI 编码,字符集:iso-8859-2):
替补拉约森
替补拉约森
替补拉约森
替补拉约森
改变 LajosnĂŠ
替补拉约森
当我使用 CONVERT 时,我丢失了“é”字符。 我怎样才能将字符串从 UTF8 转换为 ANSI?
谢谢, 佐尔坦
这是我的工具箱包 - 试试 it/tweak。使用不同的代码页进出。
function dump_dsv_fast(p_query in varchar2
,p_filename in varchar2
,p_dir in varchar2 default c_DEFAULT_DIRECTORY
,p_separator in varchar2 default ';'
,p_text_qualifier in varchar2 default ''
,p_header in boolean default true
,p_eol_format in varchar2 default chr(13) || chr(10)
,p_characterset in varchar2 default 'EE8MSWIN1250'
,p_write_mode in varchar2 default 'WB') return number
/****************************************************
Formats:
AL32UTF8 --> full utf
WE8MSWIN1252 --> no pl chars
WE8MSWIN1250 --> pl ansi
Limitations: this one is faster filing buffer once
* 2014/05/13 --> change to raw (end line character)
* 2016/02/12 --> text qualifier
--> header on/off
--> line length adjust
--> text q
* 2016/03/09 --> codepage
*****************************************************/
is
l_output utl_file.file_type;
l_theCursor integer default dbms_sql.open_cursor;
l_columnValue varchar2(32760);
l_status integer;
l_separator varchar2(10);
l_colCnt number default 0;
l_descTbl dbms_sql.desc_tab;
l_cnt number default 0;
l_buffer raw(32767) := null;
begin
l_output := utl_file.fopen(p_dir, p_filename, p_write_mode, 32760);
dbms_sql.parse(l_theCursor, p_query, dbms_sql.native);
if p_header
then
dbms_sql.describe_columns(l_theCursor, l_colCnt, l_descTbl);
l_separator := '';
for i in 1 .. l_colCnt
loop
if p_characterset = 'EE8MSWIN1250'
then
utl_file.put_raw(l_output,
utl_raw.cast_to_raw(l_separator ||
p_text_qualifier || l_descTbl(i)
.col_name || p_text_qualifier));
else
utl_file.put_raw(l_output,
utl_i18n.string_to_raw(l_separator ||
p_text_qualifier || l_descTbl(i)
.col_name ||
p_text_qualifier,
p_characterset));
end if;
dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
l_separator := p_separator;
end loop;
utl_file.put_raw(l_output, utl_raw.cast_to_raw(p_eol_format));
end if;
for i in 1 .. 255
loop
begin
dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
l_colCnt := i;
exception
when others then
if (sqlcode = -1007)
then
exit;
else
raise;
end if;
end;
end loop;
dbms_sql.define_column(l_theCursor, 1, l_columnValue, 32760);
l_status := dbms_sql.execute(l_theCursor);
loop
exit when(dbms_sql.fetch_rows(l_theCursor) <= 0);
l_separator := '';
for i in 1 .. l_colCnt
loop
dbms_sql.column_value(l_theCursor, i, l_columnValue);
if p_characterset = 'EE8MSWIN1250'
then
l_buffer := l_buffer ||
utl_raw.cast_to_raw(l_separator || p_text_qualifier ||
l_columnValue || p_text_qualifier);
else
l_buffer := l_buffer ||
utl_i18n.string_to_raw(l_separator || p_text_qualifier ||
l_columnValue ||
p_text_qualifier, p_characterset);
end if;
l_separator := p_separator;
end loop;
utl_file.put_raw(l_output,
l_buffer || utl_raw.cast_to_raw(p_eol_format));
l_buffer := '';
l_cnt := l_cnt + 1;
end loop;
dbms_sql.close_cursor(l_theCursor);
utl_file.fclose(l_output);
return l_cnt;
end dump_dsv_fast;