如何在 postgresql 中创建 n-gram
How to create n-gram in postgresql
我想在我的应用程序中使用搜索功能。
我用八卦来实现它,它工作正常。
问题是:
trigram 正在创建 3 个字符组的单词序列。
我想在单个对象中包含 3 个以上的字符。
例如:
select show_trgm('abcpqrs');
这将 return:{" a"," ab","abc","bcp","cpq","pqr","qrs","rs "}
我要{" a"," ab","abc","bcp","cpq","pqr","qrs","rs ","abcd","bcpq","cpqr"...}
我怎样才能做到这一点?
试试这个功能。
CREATE OR REPLACE FUNCTION myngram(mystr TEXT, n INT) RETURNS TEXT[]
AS $$
DECLARE
str VARCHAR;
arr TEXT[];
BEGIN
str := lpad(mystr, n - 1 + char_length(mystr), ' ');
str := rpad(str, n - 1 + char_length(str), ' ');
arr := array[]::TEXT[];
FOR i IN 1 .. char_length(str) - n + 1 LOOP
arr := arr || substring(str from i for n);
END LOOP;
RETURN arr;
END
$$
LANGUAGE plpgsql;
我的测试结果。
testdb=# SELECT myngram('abcpqrs', 4);
myngram
-----------------------------------------------------------------
{" a"," ab"," abc",abcp,bcpq,cpqr,pqrs,"qrs ","rs ","s "}
(1 row)
testdb=# SELECT myngram('abcpqrs', 5);
myngram
-------------------------------------------------------------------------------------
{" a"," ab"," abc"," abcp",abcpq,bcpqr,cpqrs,"pqrs ","qrs ","rs ","s "}
(1 row)
我想在我的应用程序中使用搜索功能。
我用八卦来实现它,它工作正常。
问题是:
trigram 正在创建 3 个字符组的单词序列。 我想在单个对象中包含 3 个以上的字符。 例如:
select show_trgm('abcpqrs');
这将 return:{" a"," ab","abc","bcp","cpq","pqr","qrs","rs "}
我要{" a"," ab","abc","bcp","cpq","pqr","qrs","rs ","abcd","bcpq","cpqr"...}
我怎样才能做到这一点?
试试这个功能。
CREATE OR REPLACE FUNCTION myngram(mystr TEXT, n INT) RETURNS TEXT[]
AS $$
DECLARE
str VARCHAR;
arr TEXT[];
BEGIN
str := lpad(mystr, n - 1 + char_length(mystr), ' ');
str := rpad(str, n - 1 + char_length(str), ' ');
arr := array[]::TEXT[];
FOR i IN 1 .. char_length(str) - n + 1 LOOP
arr := arr || substring(str from i for n);
END LOOP;
RETURN arr;
END
$$
LANGUAGE plpgsql;
我的测试结果。
testdb=# SELECT myngram('abcpqrs', 4);
myngram
-----------------------------------------------------------------
{" a"," ab"," abc",abcp,bcpq,cpqr,pqrs,"qrs ","rs ","s "}
(1 row)
testdb=# SELECT myngram('abcpqrs', 5);
myngram
-------------------------------------------------------------------------------------
{" a"," ab"," abc"," abcp",abcpq,bcpqr,cpqrs,"pqrs ","qrs ","rs ","s "}
(1 row)