如何将此 php slugify 函数重写为 mysql?

How to rewrite this php slugify function to mysql?

我在这里找到了这个 slugify 函数:PHP function to make slug (URL string)

然后我尝试将其重写为 MySQL,我所做的是:

,但在输出中我只得到降低的文本,没有破折号和字母似乎也被音译,所以只剩下破折号。

我的查询:

UPDATE `ad_kategorija` SET `slug_lt`=slugify(`kat_pavlt`), `slug_ru`=slugify(`kat_pavru`), `slug_en`=slugify(`kat_paven`)

函数:

# Regex Replace function    
DELIMITER $$

        CREATE DEFINER=`root`@`localhost` FUNCTION `regex_replace`(pattern VARCHAR(1000),replacement VARCHAR(1000),original VARCHAR(1000)) RETURNS varchar(1000) CHARSET utf8
            DETERMINISTIC
        BEGIN 
         DECLARE temp VARCHAR(1000); 
         DECLARE ch VARCHAR(1); 
         DECLARE i INT;
         SET i = 1;
         SET temp = '';
         IF original REGEXP pattern THEN 
          loop_label: LOOP 
           IF i>CHAR_LENGTH(original) THEN
            LEAVE loop_label;  
           END IF;
           SET ch = SUBSTRING(original,i,1);
           IF NOT ch REGEXP pattern THEN
            SET temp = CONCAT(temp,ch);
           ELSE
            SET temp = CONCAT(temp,replacement);
           END IF;
           SET i=i+1;
          END LOOP;
         ELSE
          SET temp = original;
         END IF;
         RETURN temp;
        END$$

# Transliteration function 
        CREATE DEFINER=`root`@`localhost` FUNCTION `transliterate`(original VARCHAR(512)) RETURNS varchar(512) CHARSET utf8
        BEGIN

          DECLARE translit VARCHAR(512) DEFAULT '';
          DECLARE len      INT(3)       DEFAULT 0;
          DECLARE pos      INT(3)       DEFAULT 1;
          DECLARE letter   CHAR(1);
          DECLARE is_lower BIT;

          SET len = CHAR_LENGTH(original);

          WHILE (pos <= len) DO
            SET letter   = SUBSTRING(original, pos, 1);
            SET is_lower = IF(LCASE(letter) COLLATE utf8_bin = letter COLLATE utf8_bin, 1, 0);

            CASE TRUE
              WHEN letter = 'a' THEN SET letter = IF(is_lower, 'a', 'A');
              WHEN letter = 'b' THEN SET letter = IF(is_lower, 'b', 'B');
              WHEN letter = 'c' THEN SET letter = IF(is_lower, 'c', 'C');
              WHEN letter = 'd' THEN SET letter = IF(is_lower, 'd', 'D');
              WHEN letter = 'e' THEN SET letter = IF(is_lower, 'e', 'E');
              WHEN letter = 'f' THEN SET letter = IF(is_lower, 'f', 'F');
              WHEN letter = 'g' THEN SET letter = IF(is_lower, 'g', 'G');
              WHEN letter = 'h' THEN SET letter = IF(is_lower, 'h', 'H');
              WHEN letter = 'i' THEN SET letter = IF(is_lower, 'i', 'I');
              WHEN letter = 'j' THEN SET letter = IF(is_lower, 'j', 'J');
              WHEN letter = 'k' THEN SET letter = IF(is_lower, 'k', 'K');
              WHEN letter = 'l' THEN SET letter = IF(is_lower, 'l', 'L');
              WHEN letter = 'ł' THEN SET letter = IF(is_lower, 'l', 'L');
              WHEN letter = 'm' THEN SET letter = IF(is_lower, 'm', 'M');
              WHEN letter = 'n' THEN SET letter = IF(is_lower, 'n', 'N');
              WHEN letter = 'o' THEN SET letter = IF(is_lower, 'o', 'O');
              WHEN letter = 'p' THEN SET letter = IF(is_lower, 'p', 'P');
              WHEN letter = 'q' THEN SET letter = IF(is_lower, 'q', 'Q');
              WHEN letter = 'r' THEN SET letter = IF(is_lower, 'r', 'R');
              WHEN letter = 's' THEN SET letter = IF(is_lower, 's', 'S');
              WHEN letter = 't' THEN SET letter = IF(is_lower, 't', 'T');
              WHEN letter = 'u' THEN SET letter = IF(is_lower, 'u', 'U');
              WHEN letter = 'v' THEN SET letter = IF(is_lower, 'v', 'V');
              WHEN letter = 'w' THEN SET letter = IF(is_lower, 'w', 'W');
              WHEN letter = 'x' THEN SET letter = IF(is_lower, 'x', 'X');
              WHEN letter = 'y' THEN SET letter = IF(is_lower, 'y', 'Y');
              WHEN letter = 'z' THEN SET letter = IF(is_lower, 'z', 'Z');
              ELSE
                SET letter = letter;
            END CASE;

            -- CONCAT seems to ignore the whitespace character. As a workaround we use
            -- CONCAT_WS with a whitespace separator when the letter is a whitespace.
            SET translit = CONCAT_WS(IF(letter = ' ', ' ', ''), translit, letter);
            SET pos = pos + 1;
          END WHILE;

          RETURN translit;

        END$$

# slug create function
        CREATE DEFINER=`root`@`localhost` FUNCTION `slugify`(`dirty_string` VARCHAR(255) CHARSET utf8) RETURNS varchar(255) CHARSET utf8
            DETERMINISTIC
        BEGIN
            DECLARE temp_string VarChar(255) DEFAULT '';
            DECLARE output VarChar(255);

            SET temp_string = regex_replace('~[^\pL\d]+~u', '-', dirty_string);

            SET temp_string = TRIM(BOTH '-' FROM temp_string);

            SET temp_string = transliterate(temp_string);

            SET temp_string = LOWER(temp_string);

            SET temp_string = regex_replace('~[^-\w]+~', '', temp_string);

            If temp_string = '' Then
                SET temp_string = '';
            End If;

            SET output = temp_string;

            Return output;

        END$$

        DELIMITER ;

所以有人可以帮我完成它吗,主要问题是我没有在空格上替换破折号,也许正则表达式是错误的。或者正则表达式替换功能需要一些插件来完成。请帮忙。

修复了我的功能,删除了 regex_replace 用法,而是使用了此处的功能:mySQL Stored Function to create a slug

添加了音译调整,最终代码在这里。音译功能不变:

    DELIMITER $$
--
-- Functions
--
CREATE DEFINER=`root`@`localhost` FUNCTION `slugify`(`dirty_string` VARCHAR(255)) RETURNS varchar(255) CHARSET utf8
    DETERMINISTIC
BEGIN
    DECLARE x, y , z Int;
    Declare temp_string, allowed_chars, new_string VarChar(255);
    Declare is_allowed Bool;
    Declare c, check_char VarChar(1);

    set allowed_chars = "abcdefghijklmnopqrstuvwxyz0123456789-";
    set temp_string = transliterate(dirty_string);
    set temp_string = lower(temp_string);

    Select temp_string Regexp('&') Into x;
    If x = 1 Then
        Set temp_string = replace(temp_string, '&', ' and ');
    End If;

    Select temp_string Regexp('[^a-z0-9]+') into x;
    If x = 1 then
        set z = 1;
        While z <= Char_length(temp_string) Do
            Set c = Substring(temp_string, z, 1);
            Set is_allowed = False;
            Set y = 1;
            Inner_Check: While y <= Char_length(allowed_chars) Do
                If (strCmp(ascii(Substring(allowed_chars,y,1)), Ascii(c)) = 0) Then
                    Set is_allowed = True;
                    Leave Inner_Check;
                End If;
                Set y = y + 1;
            End While;
            If is_allowed = False Then
                Set temp_string = Replace(temp_string, c, '-');
            End If;

            set z = z + 1;
        End While;
    End If;

    Select temp_string Regexp("^-|-$|'") into x;
    If x = 1 Then
        Set temp_string = Replace(temp_string, "'", '');
        Set z = Char_length(temp_string);
        Set y = Char_length(temp_string);
        Dash_check: While z > 1 Do
            If Strcmp(SubString(temp_string, -1, 1), '-') = 0 Then
                Set temp_string = Substring(temp_string,1, y-1);
                Set y = y - 1;
            Else
                Leave Dash_check;
            End If;
            Set z = z - 1;
        End While;
    End If;

    Repeat
        Select temp_string Regexp("--") into x;
        If x = 1 Then
            Set temp_string = Replace(temp_string, "--", "-");
        End If;
    Until x <> 1 End Repeat;

    If LOCATE('-', temp_string) = 1 Then
        Set temp_string = SUBSTRING(temp_string, 2);
    End If;

    Return temp_string;
END$$

CREATE DEFINER=`root`@`localhost` FUNCTION `transliterate`(original VARCHAR(512)) RETURNS varchar(512) CHARSET utf8
BEGIN

  DECLARE translit VARCHAR(512) DEFAULT '';
  DECLARE len      INT(3)       DEFAULT 0;
  DECLARE pos      INT(3)       DEFAULT 1;
  DECLARE letter   CHAR(1);
  DECLARE is_lower BIT;

  SET len = CHAR_LENGTH(original);

  WHILE (pos <= len) DO
    SET letter   = SUBSTRING(original, pos, 1);
    SET is_lower = IF(LCASE(letter) COLLATE utf8_bin = letter COLLATE utf8_bin, 1, 0);

    CASE TRUE
      WHEN letter = 'a' THEN SET letter = IF(is_lower, 'a', 'A');
      WHEN letter = 'b' THEN SET letter = IF(is_lower, 'b', 'B');
      WHEN letter = 'c' THEN SET letter = IF(is_lower, 'c', 'C');
      WHEN letter = 'd' THEN SET letter = IF(is_lower, 'd', 'D');
      WHEN letter = 'e' THEN SET letter = IF(is_lower, 'e', 'E');
      WHEN letter = 'f' THEN SET letter = IF(is_lower, 'f', 'F');
      WHEN letter = 'g' THEN SET letter = IF(is_lower, 'g', 'G');
      WHEN letter = 'h' THEN SET letter = IF(is_lower, 'h', 'H');
      WHEN letter = 'i' THEN SET letter = IF(is_lower, 'i', 'I');
      WHEN letter = 'j' THEN SET letter = IF(is_lower, 'j', 'J');
      WHEN letter = 'k' THEN SET letter = IF(is_lower, 'k', 'K');
      WHEN letter = 'l' THEN SET letter = IF(is_lower, 'l', 'L');
      WHEN letter = 'ł' THEN SET letter = IF(is_lower, 'l', 'L');
      WHEN letter = 'm' THEN SET letter = IF(is_lower, 'm', 'M');
      WHEN letter = 'n' THEN SET letter = IF(is_lower, 'n', 'N');
      WHEN letter = 'o' THEN SET letter = IF(is_lower, 'o', 'O');
      WHEN letter = 'p' THEN SET letter = IF(is_lower, 'p', 'P');
      WHEN letter = 'q' THEN SET letter = IF(is_lower, 'q', 'Q');
      WHEN letter = 'r' THEN SET letter = IF(is_lower, 'r', 'R');
      WHEN letter = 's' THEN SET letter = IF(is_lower, 's', 'S');
      WHEN letter = 't' THEN SET letter = IF(is_lower, 't', 'T');
      WHEN letter = 'u' THEN SET letter = IF(is_lower, 'u', 'U');
      WHEN letter = 'v' THEN SET letter = IF(is_lower, 'v', 'V');
      WHEN letter = 'w' THEN SET letter = IF(is_lower, 'w', 'W');
      WHEN letter = 'x' THEN SET letter = IF(is_lower, 'x', 'X');
      WHEN letter = 'y' THEN SET letter = IF(is_lower, 'y', 'Y');
      WHEN letter = 'z' THEN SET letter = IF(is_lower, 'z', 'Z');
      ELSE
        SET letter = letter;
    END CASE;

    -- CONCAT seems to ignore the whitespace character. As a workaround we use
    -- CONCAT_WS with a whitespace separator when the letter is a whitespace.
    SET translit = CONCAT_WS(IF(letter = ' ', ' ', ''), translit, letter);
    SET pos = pos + 1;
  END WHILE;

  RETURN translit;

END$$

DELIMITER ;

P.S。有人在聊天中询问测试场景。您至少需要一个 table 和 2x varchars(255) 列。以及来自任何书籍或文本的句子,带有逗号、点、括号、其他识别符号等。作为结果,只需要保留数字、单词、字母和单破折号。但是当我开始的时候结果是带空格的小写单词。