OraQA

Oracle Question and Answer

  • Do you have a solution to a problem? Do you have an unanswered question? Login and share it with the Oracle community. More...

Oracle News


Entries RSS feed

Comments RSS feed

How to find the longest common substring pattern in different group of strings in SQL

August 14th, 2007 By Frank Zhou

The following SQL pattern can be used to find the longest common substring pattern in different group of strings.

create table t8 as
Select 1 as id, 10 as grp_id,  'afpOnef1' as in_str from dual
union all
select 2 as id, 10 as grp_id,  '1Onez' as in_str from dual
union all
select 3 as id, 10 as grp_id,  '91Onez' as in_str from dual
union all
select 4 as id, 20 as grp_id,  'bptwoj' as in_str from dual
union all
select 5 as id ,20 as grp_id,  '9patwof' as in_str from dual
union all
select 6 as id ,20 as grp_id,  '+patwo3' as in_str from dual

COLUMN common_pattern FORMAT A28
COLUMN IN_STR FORMAT A18
-----------------------------------------------------10G SQL Solution----------------------------------------------------

SELECT DISTINCT id, grp_id, IN_STR, new_str common_pattern
FROM (SELECT id, grp_id,IN_STR, new_str, ct_str, cnt, length(new_str) len,
             MAX(LENGTH(new_str)) OVER (PARTITION BY grp_id) max_len
       FROM (SELECT id, grp_id, IN_STR, new_str, cnt,
                   COUNT(DISTINCT id) OVER (PARTITION BY grp_id, new_str) ct_str
              FROM(SELECT id, grp_id, IN_STR, rn, cnt,
      REPLACE(SYS_CONNECT_BY_PATH (ch,','),',') new_str
                     FROM (SELECT id,grp_id, IN_STR, ch, cnt,
                                  row_number( ) over (PARTITION BY
                                  grp_id,id ORDER BY rn) rn
                             FROM (SELECT id, grp_id,IN_STR, rownum rn,
                                          cnt,SUBSTR(IN_STR, LEVEL,1) ch
                                   FROM(SELECT id, grp_id, in_str,
        COUNT(id)OVER (PARTITION BY grp_id) cnt
     FROM  t8)
                                   CONNECT BY PRIOR id = id
                                   AND LEVEL <= LENGTH(IN_STR)
                                   AND PRIOR dbms_random.string('P',10)
                                       IS NOT NULL
                                  )
                          )
                      CONNECT BY PRIOR id = id
       AND PRIOR rn = rn -1
                  )
            )
           WHERE ct_str = cnt
     )
WHERE max_len = len
ORDER BY GRP_ID, ID;

        ID     GRP_ID IN_STR             COMMON_PATTERN
---------- ---------- ------------------ ----------------------------
         1         10 afpOnef1           One
         2         10 1Onez              One
         3         10 91Onez             One
         4         20 bptwoj             two
         5         20 9patwof            two
         6         20 +patwo3            two

6 rows selected.

One Response to “How to find the longest common substring pattern in different group of strings in SQL”

  1. newkid Says:

    The difference is, first I find all common characters, then use them to build common sub strings.

    SELECT id,grp_id,in_str,w
      FROM (SELECT t.*,rank() OVER(PARTITION BY grp_id ORDER BY LENGTH(w) DESC) rnk
              FROM (SELECT t.*,COUNT(DISTINCT id) OVER(PARTITION BY grp_id,w) cnt_w
                      FROM (SELECT t.*,REPLACE(SYS_CONNECT_BY_PATH(c,’/'),’/') w
                             FROM (SELECT *
                                     FROM (SELECT t.*,COUNT(DISTINCT id) OVER(PARTITION BY grp_id,c) cnt_c
                                             FROM (SELECT t.*,rn,SUBSTR(in_str,rn,1) c
                                                    FROM (SELECT t8.*,COUNT(*) OVER (PARTITION BY grp_id) cnt FROM t8) t
                                                        ,(SELECT ROWNUM rn FROM DUAL CONNECT BY ROWNUM<=100)
                                                  ) t
                                           WHERE c IS NOT NULL
                                           )
                                   WHERE cnt=cnt_c
                                   ) t
                           CONNECT BY id=PRIOR id AND rn = PRIOR rn +1
                           ) t
                    ) t
            WHERE cnt = cnt_w
            )
    WHERE rnk=1;
    

Leave a Reply

You must be logged in to post a comment.

RSS feed for comments on this question