PostgreSQL
 sql >> Datenbank >  >> RDS >> PostgreSQL

So erstellen Sie N-Gramm in Postgresql

Probieren Sie diese Funktion aus.

CREATE OR REPLACE FUNCTION myngram(mystr TEXT, n INT) RETURNS TEXT[]
AS $$
DECLARE
str VARCHAR;
arr TEXT[];
BEGIN

  str := lpad(mystr, n - 1 + char_length(mystr), ' ');
  str := rpad(str, n - 1 + char_length(str), ' ');

  arr := array[]::TEXT[];
  FOR i IN 1 .. char_length(str) - n + 1 LOOP
    arr := arr || substring(str from i for n);
  END LOOP;
  RETURN arr;
END
$$
LANGUAGE plpgsql;

Ergebnisse meiner Tests.

testdb=# SELECT myngram('abcpqrs', 4);
                             myngram                             
-----------------------------------------------------------------
 {"   a","  ab"," abc",abcp,bcpq,cpqr,pqrs,"qrs ","rs  ","s   "}
(1 row)

testdb=# SELECT myngram('abcpqrs', 5);
                                       myngram                                       
-------------------------------------------------------------------------------------
 {"    a","   ab","  abc"," abcp",abcpq,bcpqr,cpqrs,"pqrs ","qrs  ","rs   ","s    "}
(1 row)