OraQA

Oracle Question and Answer

  • Do you have a solution to a problem? Do you have an unanswered question? Login and share it with the Oracle community. More...

Oracle News


Entries RSS feed

Comments RSS feed

How to solve the Overlapping and Non-Overlapping Substrings problem in SQL

January 16th, 2008 By Frank Zhou

The following is an interesting problem posted by Jsoftware:

Find all occurrences (Starting positions) of the input string as a substring in the data.

create table t9 as
select 1 id, '1HIHIHI8HIHIHI' as str from dual
union all
select 2 id, 'HIHIHI7HIHIHI' as str from dual;

COLUMN str FORMAT A30
COLUMN start_position FORMAT A30

variable input varchar2(100);
exec :input := 'HIHI';

---------------------------10G  Overlapping Substrings SQL Solution -------------

SELECT max(str) str, count(indexs) overlapp_cnt,
trim(BOTH ','FROM regexp_replace(XMLAgg(XMLElement(X, indexs)
     ORDER BY indexs),'<X>|</X><X>|</X>',',')) AS start_position
FROM
(SELECT str, instr(str, :input, 1 ,LEVEL) AS indexs, id
  FROM t9
  CONNECT BY PRIOR str = str
  AND PRIOR id = id
  AND instr (str, :input, 1, LEVEL) > 0
  AND PRIOR dbms_random.string ('p',20) IS NOT NULL
 )
GROUP BY id;

STR                               OVERLAPP_CNT    START_POSITION
---------------              ------------              ------------------
1HIHIHI8HIHIHI         4                                    2,4,9,11
HIHIHI7HIHIHI           4                                    1,3,8,10
------------------------ Non-Overlapping Substrings --------------------

SELECT max(str) str, count(indexs) no_overlapp_cnt,
trim(BOTH ','FROM regexp_replace(XMLAgg(XMLElement(X, indexs)
ORDER BY indexs),'<X>|</X><X>|</X>', ',')) AS start_position
FROM
(SELECT str, instr(new_str, chr(0),1,LEVEL) - LEVEL + 1 AS indexs, id
 FROM
 (SELECT str, id, REGEXP_REPLACE(str,:input, chr(0)||:input) as new_str
    FROM t9)
   CONNECT BY PRIOR str = str
   AND PRIOR id = id
   AND instr (new_str, chr(0), 1, LEVEL) > 0
   AND PRIOR dbms_random.string ('p',20) IS NOT NULL
 )
GROUP BY id;

STR                        NO_OVERLAPP_CNT    START_POSITION
------------------    ---------------         -----------------
1HIHIHI8HIHIHI          2                            2,9
HIHIHI7HIHIHI            2                            1,8

Leave a Reply

You must be logged in to post a comment.

RSS feed for comments on this question