删除口音 DB2
Remove Accents DB2
有人知道如何去掉 DB2 中的重音词吗?
我收到了一条消息:
select UPPER( 'test' || 'DescricaoDomino' || NVL('SiglaDomino', 'X')) from tbProcTeste ;
我得到了那个结果:
1
-------------------------------
TESTDESCRICAODOMINOSIGLADOMINO
但是我可能会收到带有重音符号的字符串,例如“!”:
select UPPER( 'test!' || 'DescricaoDomino' || NVL('SiglaDomino', 'X'))
from tbProcTeste ;
我想得到与上面相同的结果。
有人知道我该怎么做吗?
我预期的结果
1
TESTDESCRICAODOMINOSIGLADOMINO
我没有丰富的 DB2 背景。
DB2 版本 11,1
此致
我一直期待着解决这个问题,我发现了一个问题:
CREATE OR REPLACE FUNCTION ReplaceFunction(
IN pe_sTexto VARCHAR(8000)
) RETURNS VARCHAR(8000)
BEGIN
SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
SET pe_sTexto = REPLACE (pe_sTexto,'Á', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'Ç', 'C');
SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Í', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'Ó', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Ú', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'À', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'È', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Ì', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'Ò', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Ù', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'Â', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'Ê', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Ô', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Û', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'Ã', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'Õ', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Ë', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Ü', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'é', 'e');
SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
SET pe_sTexto = REPLACE (pe_sTexto,'á', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'ç', 'C');
SET pe_sTexto = REPLACE (pe_sTexto,'é', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'í', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'ó', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'ú', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'à', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'à', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'ì', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'ò', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'ù', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'ê', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'ô', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'û', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'õ', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'!', '');
SET pe_sTexto = REPLACE (pe_sTexto,'.', '');
SET pe_sTexto = REPLACE (pe_sTexto,'*', '');
SET pe_sTexto = REPLACE (pe_sTexto,'@', '');
SET pe_sTexto = REPLACE (pe_sTexto,'#', '');
SET pe_sTexto = REPLACE (pe_sTexto,'$', '');
SET pe_sTexto = REPLACE (pe_sTexto,'&', '');
SET pe_sTexto = REPLACE (pe_sTexto,'-', '');
SET pe_sTexto = REPLACE (pe_sTexto,'+', '');
SET pe_sTexto = REPLACE (pe_sTexto,',', '');
SET pe_sTexto = REPLACE (pe_sTexto,')', '');
SET pe_sTexto = REPLACE (pe_sTexto,'(', '');
SET pe_sTexto = REPLACE (pe_sTexto,':', '');
SET pe_sTexto = REPLACE (pe_sTexto,'[', '');
SET pe_sTexto = REPLACE (pe_sTexto,']', '');
SET pe_sTexto = REPLACE (pe_sTexto,'>', '');
SET pe_sTexto = REPLACE (pe_sTexto,'<', '');
SET pe_sTexto = REPLACE (pe_sTexto,'"', '');
SET pe_sTexto = REPLACE (pe_sTexto,'´', '');
SET pe_sTexto = REPLACE (pe_sTexto,'~', '');
RETURN pe_sTexto;
END @
我知道它很大,但它适合我!
谢谢!
对于 DB2,您可以使用
CREATE OR REPLACE FUNCTION Z_REMOVEACCENTS( p_Str nvarchar2 )
RETURN nvarchar2
AS
BEGIN
p_Str := REPLACE(p_Str, 'É', 'E');
p_Str := REPLACE(p_Str, 'œ', 'oe');
p_Str := REPLACE(p_Str, 'æ', 'ae');
p_Str := REPLACE(p_Str, 'Á', 'A');
p_Str := REPLACE(p_Str, 'Ç', 'C');
p_Str := REPLACE(p_Str, 'É', 'E');
p_Str := REPLACE(p_Str, 'Í', 'I');
p_Str := REPLACE(p_Str, 'Ó', 'O');
p_Str := REPLACE(p_Str, 'Ú', 'U');
p_Str := REPLACE(p_Str, 'À', 'A');
p_Str := REPLACE(p_Str, 'È', 'E');
p_Str := REPLACE(p_Str, 'Ì', 'I');
p_Str := REPLACE(p_Str, 'Ò', 'O');
p_Str := REPLACE(p_Str, 'Ù', 'U');
p_Str := REPLACE(p_Str, 'Â', 'A');
p_Str := REPLACE(p_Str, 'Ê', 'E');
p_Str := REPLACE(p_Str, 'Ô', 'O');
p_Str := REPLACE(p_Str, 'Û', 'U');
p_Str := REPLACE(p_Str, 'Ã', 'A');
p_Str := REPLACE(p_Str, 'Õ', 'O');
p_Str := REPLACE(p_Str, 'Ë', 'E');
p_Str := REPLACE(p_Str, 'Ü', 'U');
p_Str := REPLACE(p_Str, 'é', 'e');
p_Str := REPLACE(p_Str, 'œ', 'oe');
p_Str := REPLACE(p_Str, 'æ', 'ae');
p_Str := REPLACE(p_Str, 'á', 'A');
p_Str := REPLACE(p_Str, 'ç', 'C');
p_Str := REPLACE(p_Str, 'é', 'E');
p_Str := REPLACE(p_Str, 'í', 'I');
p_Str := REPLACE(p_Str, 'ó', 'O');
p_Str := REPLACE(p_Str, 'ú', 'U');
p_Str := REPLACE(p_Str, 'à', 'A');
p_Str := REPLACE(p_Str, 'à', 'E');
p_Str := REPLACE(p_Str, 'ì', 'I');
p_Str := REPLACE(p_Str, 'ò', 'O');
p_Str := REPLACE(p_Str, 'ù', 'U');
p_Str := REPLACE(p_Str, 'ã', 'A');
p_Str := REPLACE(p_Str, 'ê', 'E');
p_Str := REPLACE(p_Str, 'ô', 'O');
p_Str := REPLACE(p_Str, 'û', 'U');
p_Str := REPLACE(p_Str, 'ã', 'A');
p_Str := REPLACE(p_Str, 'õ', 'O');
RETURN p_Str;
END;
类似这样的内容会比其他答案更短(更快)
CREATE OR REPLACE FUNCTION STRIP_ACCENTS(S VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REPLACE(REPLACE(TRANSLATE(GRAPHIC(s)
,'ACEIOUAEIOUAEOUAOEUaceiouaaiouaeouao'
,'ÁÇÉÍÓÚÀÈÌÒÙÂÊÔÛÃÕËÜáçéíóúààìòùãêôûãõ'),'œ','oe'),'æ','ae')
显然,上面没有涵盖 Unicode 中的所有重音字符(老实说,这甚至不是一个很好的列表),所以请随意扩展它
或者查看我的其他答案以获得更完整的解决方案
基于此 post 中的映射
以下 Db2 函数将用变音符号替换大多数(?)可能的 Unicode 字符及其简单的拉丁语等价物(这可能是,也可能不是给定语言中实际用作替换的字符。例如,在德语中, ü
通常被替换为ue
,而不是u
)
CREATE OR REPLACE FUNCTION DB_STRIP_DIACRITICS(string VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
string,
'[ÁĂẮẶẰẲẴǍÂẤẬẦẨẪÄǞȦǠẠȀÀẢȂĀĄÅǺḀȺÃⱯᴀ]', 'A'),
'[Ꜳ]', 'AA'),
'[ÆǼǢᴁ]', 'AE'),
'[Ꜵ]', 'AO'),
'[Ꜷ]', 'AU'),
'[ꜸꜺ]', 'AV'),
'[Ꜽ]', 'AY'),
'[ḂḄƁḆɃƂʙᴃ]', 'B'),
'[ĆČÇḈĈĊƇȻꜾᴄ]', 'C'),
'[ĎḐḒḊḌƊḎDzDžĐƋꝹᴅ]', 'D'),
'[DZDŽ]', 'DZ'),
'[ÉĔĚȨḜÊẾỆỀỂỄḘËĖẸȄÈẺȆĒḖḔĘɆẼḚƐƎᴇⱻ]', 'E'),
'[Ꝫ]', 'ET'),
'[ḞƑꝻꜰ]', 'F'),
'[ǴĞǦĢĜĠƓḠǤꝽɢʛ]', 'G'),
'[ḪȞḨĤⱧḦḢḤĦʜ]', 'H'),
'[ÍĬǏÎÏḮİỊȈÌỈȊĪĮƗĨḬɪ]', 'I'),
'[IJ]', 'IJ'),
'[Ꝭ]', 'IS'),
'[ĴɈᴊ]', 'J'),
'[ḰǨĶⱩꝂḲƘḴꝀꝄᴋ]', 'K'),
'[ĹȽĽĻḼḶḸⱠꝈḺĿⱢLjŁꞀʟᴌ]', 'L'),
'[LJ]', 'LJ'),
'[ḾṀṂⱮƜᴍ]', 'M'),
'[ŃŇŅṊṄṆǸƝṈȠNjÑɴᴎ]', 'N'),
'[NJ]', 'NJ'),
'[ÓŎǑÔỐỘỒỔỖÖȪȮȰỌŐȌÒỎƠỚỢỜỞỠȎꝊꝌŌṒṐƟǪǬØǾÕṌṎȬƆᴏᴐ]', 'O'),
'[Œɶ]', 'OE'),
'[Ƣ]', 'OI'),
'[Ꝏ]', 'OO'),
'[Ȣᴕ]', 'OU'),
'[ṔṖꝒƤꝔⱣꝐᴘ]', 'P'),
'[ꝘꝖ]', 'Q'),
'[ꞂŔŘŖṘṚṜȐȒṞɌⱤʁʀᴙᴚ]', 'R'),
'[ꞄŚṤŠṦŞŜȘṠṢṨꜱ]', 'S'),
'[ꞆŤŢṰȚȾṪṬƬṮƮŦᴛ]', 'T'),
'[Ꜩ]', 'TZ'),
'[ÚŬǓÛṶÜǗǙǛǕṲỤŰȔÙỦƯỨỰỪỬỮȖŪṺŲŮŨṸṴᴜ]', 'U'),
'[ɅꝞṾƲṼᴠ]', 'V'),
'[Ꝡ]', 'VY'),
'[ẂŴẄẆẈẀⱲᴡ]', 'W'),
'[ẌẊ]', 'X'),
'[ÝŶŸẎỴỲƳỶỾȲɎỸʏ]', 'Y'),
'[ŹŽẐⱫŻẒȤẔƵᴢ]', 'Z'),
'[áăắặằẳẵǎâấậầẩẫäǟȧǡạȁàảȃāąᶏẚåǻḁⱥãɐₐ]', 'a'),
'[ꜳ]', 'aa'),
'[æǽǣᴂ]', 'ae'),
'[ꜵ]', 'ao'),
'[ꜷ]', 'au'),
'[ꜹꜻ]', 'av'),
'[ꜽ]', 'ay'),
'[ḃḅɓḇᵬᶀƀƃ]', 'b'),
'[ćčçḉĉɕċƈȼↄꜿ]', 'c'),
'[ďḑḓȡḋḍɗᶑḏᵭᶁđɖƌꝺ]', 'd'),
'[dzdž]', 'dz'),
'[éĕěȩḝêếệềểễḙëėẹȅèẻȇēḗḕⱸęᶒɇẽḛɛᶓɘǝₑ]', 'e'),
'[ꝫ]', 'et'),
'[ḟƒᵮᶂꝼ]', 'f'),
'[ff]', 'ff'),
'[ffi]', 'ffi'),
'[ffl]', 'ffl'),
'[fi]', 'fi'),
'[fl]', 'fl'),
'[ǵğǧģĝġɠḡᶃǥᵹɡᵷ]', 'g'),
'[ḫȟḩĥⱨḧḣḥɦẖħɥʮʯ]', 'h'),
'[ƕ]', 'hv'),
'[ıíĭǐîïḯịȉìỉȋīįᶖɨĩḭᴉᵢ]', 'i'),
'[ij]', 'ij'),
'[ꝭ]', 'is'),
'[ȷɟʄǰĵʝɉⱼ]', 'j'),
'[ḱǩķⱪꝃḳƙḵᶄꝁꝅʞ]', 'k'),
'[ĺƚɬľļḽȴḷḹⱡꝉḻŀɫᶅɭłꞁ]', 'l'),
'[lj]', 'lj'),
'[ḿṁṃɱᵯᶆɯɰ]', 'm'),
'[ńňņṋȵṅṇǹɲṉƞᵰᶇɳñ]', 'n'),
'[nj]', 'nj'),
'[ɵóŏǒôốộồổỗöȫȯȱọőȍòỏơớợờởỡȏꝋꝍⱺōṓṑǫǭøǿõṍṏȭɔᶗᴑᴓₒ]', 'o'),
'[ᴔœ]', 'oe'),
'[ƣ]', 'oi'),
'[ꝏ]', 'oo'),
'[ȣ]', 'ou'),
'[ṕṗꝓƥᵱᶈꝕᵽꝑ]', 'p'),
'[ꝙʠɋꝗ]', 'q'),
'[ꞃŕřŗṙṛṝȑɾᵳȓṟɼᵲᶉɍɽɿɹɻɺⱹᵣ]', 'r'),
'[ꞅſẜẛẝśṥšṧşŝșṡṣṩʂᵴᶊȿ]', 's'),
'[st]', 'st'),
'[ꞇťţṱțȶẗⱦṫṭƭṯᵵƫʈŧʇ]', 't'),
'[ᵺ]', 'th'),
'[ꜩ]', 'tz'),
'[ᴝúŭǔûṷüǘǚǜǖṳụűȕùủưứựừửữȗūṻųᶙůũṹṵᵤ]', 'u'),
'[ᵫ]', 'ue'),
'[ꝸ]', 'um'),
'[ʌⱴꝟṿʋᶌⱱṽᵥ]', 'v'),
'[ꝡ]', 'vy'),
'[ʍẃŵẅẇẉẁⱳẘ]', 'w'),
'[ẍẋᶍₓ]', 'x'),
'[ʎýŷÿẏỵỳƴỷỿȳẙɏỹ]', 'y'),
'[źžẑʑⱬżẓȥẕᵶᶎʐƶɀ]', 'z')
我们用 Java class 解决了这个问题,用模式
实现替换
/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/javac GdprNameRegexp.java
将它打包在一个罐子里
/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/jar cvf GdprNameRegexp.jar GdprNameRegexp.class
并将其加载到 db2 中作为 UDF 的源代码
CALL sqlj.install_jar('file:/home/db2inst1/GdprNameRegexp.jar', 'GDPRNAME')
CREATE FUNCTION GDPRNAME_VERIFY(WORD VARCHAR(255)) RETURNS INTEGER FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.nameFieldsPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION
CREATE FUNCTION GDPRNAME_REPLACE(WORD VARCHAR(255), REPLACEMENT VARCHAR(255)) RETURNS VARCHAR(255) FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.replaceNameFieldPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION
Java class 示例:
import java.util.regex.Pattern;
import java.text.Normalizer;
public class GdprNameRegexp {
private static Pattern CONSECUTIVE_CHAR_PATTERN = Pattern.compile("(.)\1\1\1+", Pattern.CASE_INSENSITIVE);
private static Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^\p{L}\'\- - [\u00BA,\u00AA,\u02BA]]", Pattern.CASE_INSENSITIVE);
private static Pattern CONTAINS_NUMBERS_PATTERN = Pattern.compile(".*\d+.*", Pattern.CASE_INSENSITIVE);
public static int nameFieldsPattern(String word) {
int rsp = 0;
if (word != null && !"".equals(word.trim())) {
word = Normalizer.normalize(word, Normalizer.Form.NFC);
if (SPECIAL_CHARS_PATTERN.matcher(word).find()) {
rsp += -2;
}
if (CONSECUTIVE_CHAR_PATTERN.matcher(word).find()) {
rsp += -1;
}
if (CONTAINS_NUMBERS_PATTERN.matcher(word).find()) {
rsp += -4;
}
if (rsp == 0) {
rsp = 1;
}
}
return rsp;
}
public static String replaceNameFieldPattern(String word, String replacement) {
if (word != null) {
if (!"".equals(word.trim()) && replacement != null) {
word = Normalizer.normalize(word, Normalizer.Form.NFC);
String result = SPECIAL_CHARS_PATTERN.matcher(word).replaceAll(replacement).trim();
if (!"".equals(replacement)) {
int stop = 10;
result.replaceAll(replacement + replacement, replacement);
while (stop > 0 && result.contains(replacement + replacement)) {
result.replaceAll(replacement + replacement, replacement);
stop--;
}
}
return result.trim();
}
return word.trim();
}
return "";
}
}
有人知道如何去掉 DB2 中的重音词吗?
我收到了一条消息:
select UPPER( 'test' || 'DescricaoDomino' || NVL('SiglaDomino', 'X')) from tbProcTeste ;
我得到了那个结果:
1
-------------------------------
TESTDESCRICAODOMINOSIGLADOMINO
但是我可能会收到带有重音符号的字符串,例如“!”:
select UPPER( 'test!' || 'DescricaoDomino' || NVL('SiglaDomino', 'X'))
from tbProcTeste ;
我想得到与上面相同的结果。 有人知道我该怎么做吗?
我预期的结果
1
TESTDESCRICAODOMINOSIGLADOMINO
我没有丰富的 DB2 背景。 DB2 版本 11,1
此致
我一直期待着解决这个问题,我发现了一个问题:
CREATE OR REPLACE FUNCTION ReplaceFunction(
IN pe_sTexto VARCHAR(8000)
) RETURNS VARCHAR(8000)
BEGIN
SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
SET pe_sTexto = REPLACE (pe_sTexto,'Á', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'Ç', 'C');
SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Í', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'Ó', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Ú', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'À', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'È', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Ì', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'Ò', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Ù', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'Â', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'Ê', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Ô', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Û', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'Ã', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'Õ', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'Ë', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'Ü', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'é', 'e');
SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
SET pe_sTexto = REPLACE (pe_sTexto,'á', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'ç', 'C');
SET pe_sTexto = REPLACE (pe_sTexto,'é', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'í', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'ó', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'ú', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'à', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'à', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'ì', 'I');
SET pe_sTexto = REPLACE (pe_sTexto,'ò', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'ù', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'ê', 'E');
SET pe_sTexto = REPLACE (pe_sTexto,'ô', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'û', 'U');
SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
SET pe_sTexto = REPLACE (pe_sTexto,'õ', 'O');
SET pe_sTexto = REPLACE (pe_sTexto,'!', '');
SET pe_sTexto = REPLACE (pe_sTexto,'.', '');
SET pe_sTexto = REPLACE (pe_sTexto,'*', '');
SET pe_sTexto = REPLACE (pe_sTexto,'@', '');
SET pe_sTexto = REPLACE (pe_sTexto,'#', '');
SET pe_sTexto = REPLACE (pe_sTexto,'$', '');
SET pe_sTexto = REPLACE (pe_sTexto,'&', '');
SET pe_sTexto = REPLACE (pe_sTexto,'-', '');
SET pe_sTexto = REPLACE (pe_sTexto,'+', '');
SET pe_sTexto = REPLACE (pe_sTexto,',', '');
SET pe_sTexto = REPLACE (pe_sTexto,')', '');
SET pe_sTexto = REPLACE (pe_sTexto,'(', '');
SET pe_sTexto = REPLACE (pe_sTexto,':', '');
SET pe_sTexto = REPLACE (pe_sTexto,'[', '');
SET pe_sTexto = REPLACE (pe_sTexto,']', '');
SET pe_sTexto = REPLACE (pe_sTexto,'>', '');
SET pe_sTexto = REPLACE (pe_sTexto,'<', '');
SET pe_sTexto = REPLACE (pe_sTexto,'"', '');
SET pe_sTexto = REPLACE (pe_sTexto,'´', '');
SET pe_sTexto = REPLACE (pe_sTexto,'~', '');
RETURN pe_sTexto;
END @
我知道它很大,但它适合我!
谢谢!
对于 DB2,您可以使用
CREATE OR REPLACE FUNCTION Z_REMOVEACCENTS( p_Str nvarchar2 )
RETURN nvarchar2
AS
BEGIN
p_Str := REPLACE(p_Str, 'É', 'E');
p_Str := REPLACE(p_Str, 'œ', 'oe');
p_Str := REPLACE(p_Str, 'æ', 'ae');
p_Str := REPLACE(p_Str, 'Á', 'A');
p_Str := REPLACE(p_Str, 'Ç', 'C');
p_Str := REPLACE(p_Str, 'É', 'E');
p_Str := REPLACE(p_Str, 'Í', 'I');
p_Str := REPLACE(p_Str, 'Ó', 'O');
p_Str := REPLACE(p_Str, 'Ú', 'U');
p_Str := REPLACE(p_Str, 'À', 'A');
p_Str := REPLACE(p_Str, 'È', 'E');
p_Str := REPLACE(p_Str, 'Ì', 'I');
p_Str := REPLACE(p_Str, 'Ò', 'O');
p_Str := REPLACE(p_Str, 'Ù', 'U');
p_Str := REPLACE(p_Str, 'Â', 'A');
p_Str := REPLACE(p_Str, 'Ê', 'E');
p_Str := REPLACE(p_Str, 'Ô', 'O');
p_Str := REPLACE(p_Str, 'Û', 'U');
p_Str := REPLACE(p_Str, 'Ã', 'A');
p_Str := REPLACE(p_Str, 'Õ', 'O');
p_Str := REPLACE(p_Str, 'Ë', 'E');
p_Str := REPLACE(p_Str, 'Ü', 'U');
p_Str := REPLACE(p_Str, 'é', 'e');
p_Str := REPLACE(p_Str, 'œ', 'oe');
p_Str := REPLACE(p_Str, 'æ', 'ae');
p_Str := REPLACE(p_Str, 'á', 'A');
p_Str := REPLACE(p_Str, 'ç', 'C');
p_Str := REPLACE(p_Str, 'é', 'E');
p_Str := REPLACE(p_Str, 'í', 'I');
p_Str := REPLACE(p_Str, 'ó', 'O');
p_Str := REPLACE(p_Str, 'ú', 'U');
p_Str := REPLACE(p_Str, 'à', 'A');
p_Str := REPLACE(p_Str, 'à', 'E');
p_Str := REPLACE(p_Str, 'ì', 'I');
p_Str := REPLACE(p_Str, 'ò', 'O');
p_Str := REPLACE(p_Str, 'ù', 'U');
p_Str := REPLACE(p_Str, 'ã', 'A');
p_Str := REPLACE(p_Str, 'ê', 'E');
p_Str := REPLACE(p_Str, 'ô', 'O');
p_Str := REPLACE(p_Str, 'û', 'U');
p_Str := REPLACE(p_Str, 'ã', 'A');
p_Str := REPLACE(p_Str, 'õ', 'O');
RETURN p_Str;
END;
类似这样的内容会比其他答案更短(更快)
CREATE OR REPLACE FUNCTION STRIP_ACCENTS(S VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REPLACE(REPLACE(TRANSLATE(GRAPHIC(s)
,'ACEIOUAEIOUAEOUAOEUaceiouaaiouaeouao'
,'ÁÇÉÍÓÚÀÈÌÒÙÂÊÔÛÃÕËÜáçéíóúààìòùãêôûãõ'),'œ','oe'),'æ','ae')
显然,上面没有涵盖 Unicode 中的所有重音字符(老实说,这甚至不是一个很好的列表),所以请随意扩展它
或者查看我的其他答案以获得更完整的解决方案
基于此 post 中的映射
以下 Db2 函数将用变音符号替换大多数(?)可能的 Unicode 字符及其简单的拉丁语等价物(这可能是,也可能不是给定语言中实际用作替换的字符。例如,在德语中, ü
通常被替换为ue
,而不是u
)
CREATE OR REPLACE FUNCTION DB_STRIP_DIACRITICS(string VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
string,
'[ÁĂẮẶẰẲẴǍÂẤẬẦẨẪÄǞȦǠẠȀÀẢȂĀĄÅǺḀȺÃⱯᴀ]', 'A'),
'[Ꜳ]', 'AA'),
'[ÆǼǢᴁ]', 'AE'),
'[Ꜵ]', 'AO'),
'[Ꜷ]', 'AU'),
'[ꜸꜺ]', 'AV'),
'[Ꜽ]', 'AY'),
'[ḂḄƁḆɃƂʙᴃ]', 'B'),
'[ĆČÇḈĈĊƇȻꜾᴄ]', 'C'),
'[ĎḐḒḊḌƊḎDzDžĐƋꝹᴅ]', 'D'),
'[DZDŽ]', 'DZ'),
'[ÉĔĚȨḜÊẾỆỀỂỄḘËĖẸȄÈẺȆĒḖḔĘɆẼḚƐƎᴇⱻ]', 'E'),
'[Ꝫ]', 'ET'),
'[ḞƑꝻꜰ]', 'F'),
'[ǴĞǦĢĜĠƓḠǤꝽɢʛ]', 'G'),
'[ḪȞḨĤⱧḦḢḤĦʜ]', 'H'),
'[ÍĬǏÎÏḮİỊȈÌỈȊĪĮƗĨḬɪ]', 'I'),
'[IJ]', 'IJ'),
'[Ꝭ]', 'IS'),
'[ĴɈᴊ]', 'J'),
'[ḰǨĶⱩꝂḲƘḴꝀꝄᴋ]', 'K'),
'[ĹȽĽĻḼḶḸⱠꝈḺĿⱢLjŁꞀʟᴌ]', 'L'),
'[LJ]', 'LJ'),
'[ḾṀṂⱮƜᴍ]', 'M'),
'[ŃŇŅṊṄṆǸƝṈȠNjÑɴᴎ]', 'N'),
'[NJ]', 'NJ'),
'[ÓŎǑÔỐỘỒỔỖÖȪȮȰỌŐȌÒỎƠỚỢỜỞỠȎꝊꝌŌṒṐƟǪǬØǾÕṌṎȬƆᴏᴐ]', 'O'),
'[Œɶ]', 'OE'),
'[Ƣ]', 'OI'),
'[Ꝏ]', 'OO'),
'[Ȣᴕ]', 'OU'),
'[ṔṖꝒƤꝔⱣꝐᴘ]', 'P'),
'[ꝘꝖ]', 'Q'),
'[ꞂŔŘŖṘṚṜȐȒṞɌⱤʁʀᴙᴚ]', 'R'),
'[ꞄŚṤŠṦŞŜȘṠṢṨꜱ]', 'S'),
'[ꞆŤŢṰȚȾṪṬƬṮƮŦᴛ]', 'T'),
'[Ꜩ]', 'TZ'),
'[ÚŬǓÛṶÜǗǙǛǕṲỤŰȔÙỦƯỨỰỪỬỮȖŪṺŲŮŨṸṴᴜ]', 'U'),
'[ɅꝞṾƲṼᴠ]', 'V'),
'[Ꝡ]', 'VY'),
'[ẂŴẄẆẈẀⱲᴡ]', 'W'),
'[ẌẊ]', 'X'),
'[ÝŶŸẎỴỲƳỶỾȲɎỸʏ]', 'Y'),
'[ŹŽẐⱫŻẒȤẔƵᴢ]', 'Z'),
'[áăắặằẳẵǎâấậầẩẫäǟȧǡạȁàảȃāąᶏẚåǻḁⱥãɐₐ]', 'a'),
'[ꜳ]', 'aa'),
'[æǽǣᴂ]', 'ae'),
'[ꜵ]', 'ao'),
'[ꜷ]', 'au'),
'[ꜹꜻ]', 'av'),
'[ꜽ]', 'ay'),
'[ḃḅɓḇᵬᶀƀƃ]', 'b'),
'[ćčçḉĉɕċƈȼↄꜿ]', 'c'),
'[ďḑḓȡḋḍɗᶑḏᵭᶁđɖƌꝺ]', 'd'),
'[dzdž]', 'dz'),
'[éĕěȩḝêếệềểễḙëėẹȅèẻȇēḗḕⱸęᶒɇẽḛɛᶓɘǝₑ]', 'e'),
'[ꝫ]', 'et'),
'[ḟƒᵮᶂꝼ]', 'f'),
'[ff]', 'ff'),
'[ffi]', 'ffi'),
'[ffl]', 'ffl'),
'[fi]', 'fi'),
'[fl]', 'fl'),
'[ǵğǧģĝġɠḡᶃǥᵹɡᵷ]', 'g'),
'[ḫȟḩĥⱨḧḣḥɦẖħɥʮʯ]', 'h'),
'[ƕ]', 'hv'),
'[ıíĭǐîïḯịȉìỉȋīįᶖɨĩḭᴉᵢ]', 'i'),
'[ij]', 'ij'),
'[ꝭ]', 'is'),
'[ȷɟʄǰĵʝɉⱼ]', 'j'),
'[ḱǩķⱪꝃḳƙḵᶄꝁꝅʞ]', 'k'),
'[ĺƚɬľļḽȴḷḹⱡꝉḻŀɫᶅɭłꞁ]', 'l'),
'[lj]', 'lj'),
'[ḿṁṃɱᵯᶆɯɰ]', 'm'),
'[ńňņṋȵṅṇǹɲṉƞᵰᶇɳñ]', 'n'),
'[nj]', 'nj'),
'[ɵóŏǒôốộồổỗöȫȯȱọőȍòỏơớợờởỡȏꝋꝍⱺōṓṑǫǭøǿõṍṏȭɔᶗᴑᴓₒ]', 'o'),
'[ᴔœ]', 'oe'),
'[ƣ]', 'oi'),
'[ꝏ]', 'oo'),
'[ȣ]', 'ou'),
'[ṕṗꝓƥᵱᶈꝕᵽꝑ]', 'p'),
'[ꝙʠɋꝗ]', 'q'),
'[ꞃŕřŗṙṛṝȑɾᵳȓṟɼᵲᶉɍɽɿɹɻɺⱹᵣ]', 'r'),
'[ꞅſẜẛẝśṥšṧşŝșṡṣṩʂᵴᶊȿ]', 's'),
'[st]', 'st'),
'[ꞇťţṱțȶẗⱦṫṭƭṯᵵƫʈŧʇ]', 't'),
'[ᵺ]', 'th'),
'[ꜩ]', 'tz'),
'[ᴝúŭǔûṷüǘǚǜǖṳụűȕùủưứựừửữȗūṻųᶙůũṹṵᵤ]', 'u'),
'[ᵫ]', 'ue'),
'[ꝸ]', 'um'),
'[ʌⱴꝟṿʋᶌⱱṽᵥ]', 'v'),
'[ꝡ]', 'vy'),
'[ʍẃŵẅẇẉẁⱳẘ]', 'w'),
'[ẍẋᶍₓ]', 'x'),
'[ʎýŷÿẏỵỳƴỷỿȳẙɏỹ]', 'y'),
'[źžẑʑⱬżẓȥẕᵶᶎʐƶɀ]', 'z')
我们用 Java class 解决了这个问题,用模式
实现替换/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/javac GdprNameRegexp.java
将它打包在一个罐子里
/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/jar cvf GdprNameRegexp.jar GdprNameRegexp.class
并将其加载到 db2 中作为 UDF 的源代码
CALL sqlj.install_jar('file:/home/db2inst1/GdprNameRegexp.jar', 'GDPRNAME')
CREATE FUNCTION GDPRNAME_VERIFY(WORD VARCHAR(255)) RETURNS INTEGER FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.nameFieldsPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION
CREATE FUNCTION GDPRNAME_REPLACE(WORD VARCHAR(255), REPLACEMENT VARCHAR(255)) RETURNS VARCHAR(255) FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.replaceNameFieldPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION
Java class 示例:
import java.util.regex.Pattern;
import java.text.Normalizer;
public class GdprNameRegexp {
private static Pattern CONSECUTIVE_CHAR_PATTERN = Pattern.compile("(.)\1\1\1+", Pattern.CASE_INSENSITIVE);
private static Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^\p{L}\'\- - [\u00BA,\u00AA,\u02BA]]", Pattern.CASE_INSENSITIVE);
private static Pattern CONTAINS_NUMBERS_PATTERN = Pattern.compile(".*\d+.*", Pattern.CASE_INSENSITIVE);
public static int nameFieldsPattern(String word) {
int rsp = 0;
if (word != null && !"".equals(word.trim())) {
word = Normalizer.normalize(word, Normalizer.Form.NFC);
if (SPECIAL_CHARS_PATTERN.matcher(word).find()) {
rsp += -2;
}
if (CONSECUTIVE_CHAR_PATTERN.matcher(word).find()) {
rsp += -1;
}
if (CONTAINS_NUMBERS_PATTERN.matcher(word).find()) {
rsp += -4;
}
if (rsp == 0) {
rsp = 1;
}
}
return rsp;
}
public static String replaceNameFieldPattern(String word, String replacement) {
if (word != null) {
if (!"".equals(word.trim()) && replacement != null) {
word = Normalizer.normalize(word, Normalizer.Form.NFC);
String result = SPECIAL_CHARS_PATTERN.matcher(word).replaceAll(replacement).trim();
if (!"".equals(replacement)) {
int stop = 10;
result.replaceAll(replacement + replacement, replacement);
while (stop > 0 && result.contains(replacement + replacement)) {
result.replaceAll(replacement + replacement, replacement);
stop--;
}
}
return result.trim();
}
return word.trim();
}
return "";
}
}