删除口音 DB2

Remove Accents DB2

有人知道如何去掉 DB2 中的重音词吗?

我收到了一条消息:

select UPPER(  'test'  || 'DescricaoDomino' || NVL('SiglaDomino', 'X')) from tbProcTeste  ;

我得到了那个结果:

 1
 -------------------------------

     TESTDESCRICAODOMINOSIGLADOMINO

但是我可能会收到带有重音符号的字符串,例如“!”:

      select UPPER(  'test!'  || 'DescricaoDomino' || NVL('SiglaDomino', 'X')) 
from  tbProcTeste  ;

我想得到与上面相同的结果。 有人知道我该怎么做吗?

我预期的结果

1


 TESTDESCRICAODOMINOSIGLADOMINO

我没有丰富的 DB2 背景。 DB2 版本 11,1

此致

我一直期待着解决这个问题,我发现了一个问题:

CREATE OR REPLACE FUNCTION ReplaceFunction(
  IN pe_sTexto VARCHAR(8000)
  ) RETURNS VARCHAR(8000)
   BEGIN
  SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
  SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
  SET pe_sTexto = REPLACE (pe_sTexto,'Á', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ç', 'C');
  SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Í', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ó', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ú', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'À', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'È', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ì', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ò', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ù', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'Â', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ê', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ô', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'Û', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ã', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'Õ', 'O');  
  SET pe_sTexto = REPLACE (pe_sTexto,'Ë', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ü', 'U');  
  SET pe_sTexto = REPLACE (pe_sTexto,'é', 'e');
  SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
  SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
  SET pe_sTexto = REPLACE (pe_sTexto,'á', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'ç', 'C');
  SET pe_sTexto = REPLACE (pe_sTexto,'é', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'í', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'ó', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'ú', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'à', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'à', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'ì', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'ò', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'ù', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'ê', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'ô', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'û', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'õ', 'O');      
  SET pe_sTexto = REPLACE (pe_sTexto,'!', '');              
  SET pe_sTexto = REPLACE (pe_sTexto,'.', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'*', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'@', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'#', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'$', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'&', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'-', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'+', '');
  SET pe_sTexto = REPLACE (pe_sTexto,',', '');
  SET pe_sTexto = REPLACE (pe_sTexto,')', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'(', '');        
  SET pe_sTexto = REPLACE (pe_sTexto,':', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'[', '');
  SET pe_sTexto = REPLACE (pe_sTexto,']', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'>', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'<', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'"', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'´', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'~', '');             
 RETURN pe_sTexto;
 END  @

我知道它很大,但它适合我!

谢谢!

对于 DB2,您可以使用

CREATE OR REPLACE FUNCTION Z_REMOVEACCENTS( p_Str nvarchar2 )
RETURN nvarchar2
AS
BEGIN
p_Str := REPLACE(p_Str, 'É', 'E');
p_Str := REPLACE(p_Str, 'œ', 'oe');
  p_Str := REPLACE(p_Str, 'æ', 'ae');
  p_Str := REPLACE(p_Str, 'Á', 'A');
  p_Str := REPLACE(p_Str, 'Ç', 'C');
  p_Str := REPLACE(p_Str, 'É', 'E');
  p_Str := REPLACE(p_Str, 'Í', 'I');
  p_Str := REPLACE(p_Str, 'Ó', 'O');
  p_Str := REPLACE(p_Str, 'Ú', 'U');
  p_Str := REPLACE(p_Str, 'À', 'A');
  p_Str := REPLACE(p_Str, 'È', 'E');
  p_Str := REPLACE(p_Str, 'Ì', 'I');
  p_Str := REPLACE(p_Str, 'Ò', 'O');
  p_Str := REPLACE(p_Str, 'Ù', 'U');
  p_Str := REPLACE(p_Str, 'Â', 'A');
  p_Str := REPLACE(p_Str, 'Ê', 'E');
  p_Str := REPLACE(p_Str, 'Ô', 'O');
  p_Str := REPLACE(p_Str, 'Û', 'U');
  p_Str := REPLACE(p_Str, 'Ã', 'A');
  p_Str := REPLACE(p_Str, 'Õ', 'O');
  p_Str := REPLACE(p_Str, 'Ë', 'E');
  p_Str := REPLACE(p_Str, 'Ü', 'U');
  p_Str := REPLACE(p_Str, 'é', 'e');
  p_Str := REPLACE(p_Str, 'œ', 'oe');
  p_Str := REPLACE(p_Str, 'æ', 'ae');
  p_Str := REPLACE(p_Str, 'á', 'A');
  p_Str := REPLACE(p_Str, 'ç', 'C');
  p_Str := REPLACE(p_Str, 'é', 'E');
  p_Str := REPLACE(p_Str, 'í', 'I');
  p_Str := REPLACE(p_Str, 'ó', 'O');
  p_Str := REPLACE(p_Str, 'ú', 'U');
  p_Str := REPLACE(p_Str, 'à', 'A');
  p_Str := REPLACE(p_Str, 'à', 'E');
  p_Str := REPLACE(p_Str, 'ì', 'I');
  p_Str := REPLACE(p_Str, 'ò', 'O');
  p_Str := REPLACE(p_Str, 'ù', 'U');
  p_Str := REPLACE(p_Str, 'ã', 'A');
  p_Str := REPLACE(p_Str, 'ê', 'E');
  p_Str := REPLACE(p_Str, 'ô', 'O');
  p_Str := REPLACE(p_Str, 'û', 'U');
  p_Str := REPLACE(p_Str, 'ã', 'A');
  p_Str := REPLACE(p_Str, 'õ', 'O');
  RETURN p_Str;
  END;

类似这样的内容会比其他答案更短(更快)

CREATE OR REPLACE FUNCTION STRIP_ACCENTS(S VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REPLACE(REPLACE(TRANSLATE(GRAPHIC(s)
,'ACEIOUAEIOUAEOUAOEUaceiouaaiouaeouao'
,'ÁÇÉÍÓÚÀÈÌÒÙÂÊÔÛÃÕËÜáçéíóúààìòùãêôûãõ'),'œ','oe'),'æ','ae')

显然,上面没有涵盖 Unicode 中的所有重音字符(老实说,这甚至不是一个很好的列表),所以请随意扩展它

或者查看我的其他答案以获得更完整的解决方案

基于此 post 中的映射 以下 Db2 函数将用变音符号替换大多数(?)可能的 Unicode 字符及其简单的拉丁语等价物(这可能是,也可能不是给定语言中实际用作替换的字符。例如,在德语中, ü通常被替换为ue,而不是u)

CREATE OR REPLACE FUNCTION DB_STRIP_DIACRITICS(string VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
 string,
'[ÁĂẮẶẰẲẴǍÂẤẬẦẨẪÄǞȦǠẠȀÀẢȂĀĄÅǺḀȺÃⱯᴀ]',                  'A'), 
'[Ꜳ]',                                                 'AA'), 
'[ÆǼǢᴁ]',                                              'AE'), 
'[Ꜵ]',                                                 'AO'), 
'[Ꜷ]',                                                 'AU'), 
'[ꜸꜺ]',                                                'AV'), 
'[Ꜽ]',                                                 'AY'), 
'[ḂḄƁḆɃƂʙᴃ]',                                          'B'), 
'[ĆČÇḈĈĊƇȻꜾᴄ]',                                        'C'), 
'[ĎḐḒḊḌƊḎDzDžĐƋꝹᴅ]',                                     'D'), 
'[DZDŽ]',                                                'DZ'), 
'[ÉĔĚȨḜÊẾỆỀỂỄḘËĖẸȄÈẺȆĒḖḔĘɆẼḚƐƎᴇⱻ]',                    'E'), 
'[Ꝫ]',                                                 'ET'), 
'[ḞƑꝻꜰ]',                                              'F'), 
'[ǴĞǦĢĜĠƓḠǤꝽɢʛ]',                                      'G'), 
'[ḪȞḨĤⱧḦḢḤĦʜ]',                                        'H'), 
'[ÍĬǏÎÏḮİỊȈÌỈȊĪĮƗĨḬɪ]',                                'I'), 
'[IJ]',                                                 'IJ'), 
'[Ꝭ]',                                                 'IS'), 
'[ĴɈᴊ]',                                               'J'), 
'[ḰǨĶⱩꝂḲƘḴꝀꝄᴋ]',                                       'K'), 
'[ĹȽĽĻḼḶḸⱠꝈḺĿⱢLjŁꞀʟᴌ]',                                 'L'), 
'[LJ]',                                                 'LJ'), 
'[ḾṀṂⱮƜᴍ]',                                            'M'), 
'[ŃŇŅṊṄṆǸƝṈȠNjÑɴᴎ]',                                    'N'), 
'[NJ]',                                                 'NJ'), 
'[ÓŎǑÔỐỘỒỔỖÖȪȮȰỌŐȌÒỎƠỚỢỜỞỠȎꝊꝌŌṒṐƟǪǬØǾÕṌṎȬƆᴏᴐ]',        'O'), 
'[Œɶ]',                                                'OE'), 
'[Ƣ]',                                                 'OI'), 
'[Ꝏ]',                                                 'OO'), 
'[Ȣᴕ]',                                                'OU'), 
'[ṔṖꝒƤꝔⱣꝐᴘ]',                                          'P'), 
'[ꝘꝖ]',                                                'Q'), 
'[ꞂŔŘŖṘṚṜȐȒṞɌⱤʁʀᴙᴚ]',                                  'R'), 
'[ꞄŚṤŠṦŞŜȘṠṢṨꜱ]',                                      'S'), 
'[ꞆŤŢṰȚȾṪṬƬṮƮŦᴛ]',                                     'T'), 
'[Ꜩ]',                                                 'TZ'), 
'[ÚŬǓÛṶÜǗǙǛǕṲỤŰȔÙỦƯỨỰỪỬỮȖŪṺŲŮŨṸṴᴜ]',                   'U'), 
'[ɅꝞṾƲṼᴠ]',                                            'V'), 
'[Ꝡ]',                                                 'VY'), 
'[ẂŴẄẆẈẀⱲᴡ]',                                          'W'), 
'[ẌẊ]',                                                'X'), 
'[ÝŶŸẎỴỲƳỶỾȲɎỸʏ]',                                     'Y'), 
'[ŹŽẐⱫŻẒȤẔƵᴢ]',                                        'Z'), 
'[áăắặằẳẵǎâấậầẩẫäǟȧǡạȁàảȃāąᶏẚåǻḁⱥãɐₐ]',                'a'), 
'[ꜳ]',                                                 'aa'), 
'[æǽǣᴂ]',                                              'ae'), 
'[ꜵ]',                                                 'ao'), 
'[ꜷ]',                                                 'au'), 
'[ꜹꜻ]',                                                'av'), 
'[ꜽ]',                                                 'ay'), 
'[ḃḅɓḇᵬᶀƀƃ]',                                          'b'), 
'[ćčçḉĉɕċƈȼↄꜿ]',                                       'c'), 
'[ďḑḓȡḋḍɗᶑḏᵭᶁđɖƌꝺ]',                                   'd'), 
'[dzdž]',                                                'dz'), 
'[éĕěȩḝêếệềểễḙëėẹȅèẻȇēḗḕⱸęᶒɇẽḛɛᶓɘǝₑ]',                 'e'), 
'[ꝫ]',                                                 'et'), 
'[ḟƒᵮᶂꝼ]',                                             'f'), 
'[ff]',                                                 'ff'), 
'[ffi]',                                                 'ffi'), 
'[ffl]',                                                 'ffl'), 
'[fi]',                                                 'fi'), 
'[fl]',                                                 'fl'), 
'[ǵğǧģĝġɠḡᶃǥᵹɡᵷ]',                                     'g'), 
'[ḫȟḩĥⱨḧḣḥɦẖħɥʮʯ]',                                    'h'), 
'[ƕ]',                                                 'hv'), 
'[ıíĭǐîïḯịȉìỉȋīįᶖɨĩḭᴉᵢ]',                              'i'), 
'[ij]',                                                 'ij'), 
'[ꝭ]',                                                 'is'), 
'[ȷɟʄǰĵʝɉⱼ]',                                          'j'), 
'[ḱǩķⱪꝃḳƙḵᶄꝁꝅʞ]',                                      'k'), 
'[ĺƚɬľļḽȴḷḹⱡꝉḻŀɫᶅɭłꞁ]',                                'l'), 
'[lj]',                                                 'lj'), 
'[ḿṁṃɱᵯᶆɯɰ]',                                          'm'), 
'[ńňņṋȵṅṇǹɲṉƞᵰᶇɳñ]',                                   'n'), 
'[nj]',                                                 'nj'), 
'[ɵóŏǒôốộồổỗöȫȯȱọőȍòỏơớợờởỡȏꝋꝍⱺōṓṑǫǭøǿõṍṏȭɔᶗᴑᴓₒ]',     'o'), 
'[ᴔœ]',                                                'oe'), 
'[ƣ]',                                                 'oi'), 
'[ꝏ]',                                                 'oo'), 
'[ȣ]',                                                 'ou'), 
'[ṕṗꝓƥᵱᶈꝕᵽꝑ]',                                         'p'), 
'[ꝙʠɋꝗ]',                                              'q'), 
'[ꞃŕřŗṙṛṝȑɾᵳȓṟɼᵲᶉɍɽɿɹɻɺⱹᵣ]',                           'r'), 
'[ꞅſẜẛẝśṥšṧşŝșṡṣṩʂᵴᶊȿ]',                               's'), 
'[st]',                                                 'st'), 
'[ꞇťţṱțȶẗⱦṫṭƭṯᵵƫʈŧʇ]',                                 't'), 
'[ᵺ]',                                                 'th'), 
'[ꜩ]',                                                 'tz'), 
'[ᴝúŭǔûṷüǘǚǜǖṳụűȕùủưứựừửữȗūṻųᶙůũṹṵᵤ]',                 'u'), 
'[ᵫ]',                                                 'ue'), 
'[ꝸ]',                                                 'um'), 
'[ʌⱴꝟṿʋᶌⱱṽᵥ]',                                         'v'), 
'[ꝡ]',                                                 'vy'), 
'[ʍẃŵẅẇẉẁⱳẘ]',                                         'w'), 
'[ẍẋᶍₓ]',                                              'x'), 
'[ʎýŷÿẏỵỳƴỷỿȳẙɏỹ]',                                    'y'), 
'[źžẑʑⱬżẓȥẕᵶᶎʐƶɀ]',                                    'z')

我们用 Java class 解决了这个问题,用模式

实现替换
/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/javac GdprNameRegexp.java

将它打包在一个罐子里

/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/jar cvf GdprNameRegexp.jar GdprNameRegexp.class

并将其加载到 db2 中作为 UDF 的源代码

CALL sqlj.install_jar('file:/home/db2inst1/GdprNameRegexp.jar', 'GDPRNAME')
CREATE FUNCTION GDPRNAME_VERIFY(WORD VARCHAR(255)) RETURNS INTEGER FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.nameFieldsPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION
CREATE FUNCTION GDPRNAME_REPLACE(WORD VARCHAR(255), REPLACEMENT VARCHAR(255)) RETURNS VARCHAR(255) FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.replaceNameFieldPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION

Java class 示例:

import java.util.regex.Pattern;
import java.text.Normalizer;

public class GdprNameRegexp {
  private static Pattern CONSECUTIVE_CHAR_PATTERN = Pattern.compile("(.)\1\1\1+", Pattern.CASE_INSENSITIVE);
  private static Pattern SPECIAL_CHARS_PATTERN    = Pattern.compile("[^\p{L}\'\- - [\u00BA,\u00AA,\u02BA]]", Pattern.CASE_INSENSITIVE);
  private static Pattern CONTAINS_NUMBERS_PATTERN = Pattern.compile(".*\d+.*", Pattern.CASE_INSENSITIVE);

  public static int nameFieldsPattern(String word) {
    int rsp = 0;
    if (word != null && !"".equals(word.trim())) {
      word = Normalizer.normalize(word, Normalizer.Form.NFC);
      if (SPECIAL_CHARS_PATTERN.matcher(word).find()) {
        rsp += -2;
      }
      if (CONSECUTIVE_CHAR_PATTERN.matcher(word).find()) {
        rsp += -1;
      }
      if (CONTAINS_NUMBERS_PATTERN.matcher(word).find()) {
        rsp += -4;
      }
      if (rsp == 0) {
        rsp = 1;
      }
    }
    return rsp;
  }

  public static String replaceNameFieldPattern(String word, String replacement) {
    if (word != null) {
      if (!"".equals(word.trim()) && replacement != null) {
        word = Normalizer.normalize(word, Normalizer.Form.NFC);
        String result = SPECIAL_CHARS_PATTERN.matcher(word).replaceAll(replacement).trim();
        if (!"".equals(replacement)) {
          int stop = 10;
          result.replaceAll(replacement + replacement, replacement);
          while (stop > 0 && result.contains(replacement + replacement)) {
            result.replaceAll(replacement + replacement, replacement);
            stop--;
          }
        }
        return result.trim();
      }
      return word.trim();
    }
    return "";
  }
}