OraQA

Oracle Question and Answer

  • Do you have a solution to a problem? Do you have an unanswered question? Login and share it with the Oracle community. More...

Oracle News


Entries RSS feed

Comments RSS feed

How to find the longest common substring pattern in different group of strings in SQL

August 14th, 2007 By Frank Zhou

The following SQL pattern can be used to find the longest common substring pattern in different group of strings.

create table t8 as
Select 1 as id, 10 as grp_id,  'afpOnef1' as in_str from dual
union all
select 2 as id, 10 as grp_id,  '1Onez' as in_str from dual
union all
select 3 as id, 10 as grp_id,  '91Onez' as in_str from dual
union all
select 4 as id, 20 as grp_id,  'bptwoj' as in_str from dual
union all
select 5 as id ,20 as grp_id,  '9patwof' as in_str from dual
union all
select 6 as id ,20 as grp_id,  '+patwo3' as in_str from dual

COLUMN common_pattern FORMAT A28
COLUMN IN_STR FORMAT A18

SELECT DISTINCT id, grp_id, IN_STR, new_str common_pattern
FROM (SELECT id, grp_id,IN_STR, new_str, ct_str, cnt, length(new_str) len,
             MAX(LENGTH(new_str)) OVER (PARTITION BY grp_id) max_len
       FROM (SELECT id, grp_id, IN_STR, new_str, cnt,
                   COUNT(DISTINCT id) OVER (PARTITION BY grp_id, new_str) ct_str
              FROM(SELECT id, grp_id, IN_STR, rn, cnt,
      REPLACE(SYS_CONNECT_BY_PATH (ch,','),',') new_str
                     FROM (SELECT id,grp_id, IN_STR, ch, cnt,
                                  row_number( ) over (PARTITION BY
                                  grp_id,id ORDER BY rn) rn
                             FROM (SELECT id, grp_id,IN_STR, rownum rn,
                                          cnt,SUBSTR(IN_STR, LEVEL,1) ch
                                   FROM(SELECT id, grp_id, in_str,
        COUNT(id)OVER (PARTITION BY grp_id) cnt
     FROM  t8)
                                   CONNECT BY PRIOR id = id
                                   AND LEVEL <= LENGTH(IN_STR)
                                   AND PRIOR dbms_random.string('P',10)
                                       IS NOT NULL
                                  )
                          )
                      CONNECT BY PRIOR id = id
       AND PRIOR rn = rn -1
                  )
            )
           WHERE ct_str = cnt
     )
WHERE max_len = len
ORDER BY GRP_ID, ID;

        ID     GRP_ID IN_STR             COMMON_PATTERN
---------- ---------- ------------------ ----------------------------
         1         10 afpOnef1           One
         2         10 1Onez              One
         3         10 91Onez             One
         4         20 bptwoj             two
         5         20 9patwof            two
         6         20 +patwo3            two

6 rows selected.

Leave a Reply

You must be logged in to post a comment.

RSS feed for comments on this question