How to solve the Overlapping and Non-Overlapping Substrings problem in SQL
January 16th, 2008 By Frank Zhou
The following is an interesting problem posted by Jsoftware:
Find all occurrences (Starting positions) of the input string as a substring in the data.
create table t9 as
select 1 id, '1HIHIHI8HIHIHI' as str from dual
union all
select 2 id, 'HIHIHI7HIHIHI' as str from dual;
COLUMN str FORMAT A30
COLUMN start_position FORMAT A30
variable input varchar2(100);
exec :input := 'HIHI';
---------------------------10G Overlapping Substrings SQL Solution -------------
SELECT max(str) str, count(indexs) overlapp_cnt,
trim(BOTH ','FROM regexp_replace(XMLAgg(XMLElement(X, indexs)
ORDER BY indexs),'<X>|</X><X>|</X>',',')) AS start_position
FROM
(SELECT str, instr(str, :input, 1 ,LEVEL) AS indexs, id
FROM t9
CONNECT BY PRIOR str = str
AND PRIOR id = id
AND instr (str, :input, 1, LEVEL) > 0
AND PRIOR dbms_random.string ('p',20) IS NOT NULL
)
GROUP BY id;
STR OVERLAPP_CNT START_POSITION
--------------- ------------ ------------------
1HIHIHI8HIHIHI 4 2,4,9,11
HIHIHI7HIHIHI 4 1,3,8,10
------------------------ Non-Overlapping Substrings --------------------
SELECT max(str) str, count(indexs) no_overlapp_cnt,
trim(BOTH ','FROM regexp_replace(XMLAgg(XMLElement(X, indexs)
ORDER BY indexs),'<X>|</X><X>|</X>', ',')) AS start_position
FROM
(SELECT str, instr(new_str, chr(0),1,LEVEL) - LEVEL + 1 AS indexs, id
FROM
(SELECT str, id, REGEXP_REPLACE(str,:input, chr(0)||:input) as new_str
FROM t9)
CONNECT BY PRIOR str = str
AND PRIOR id = id
AND instr (new_str, chr(0), 1, LEVEL) > 0
AND PRIOR dbms_random.string ('p',20) IS NOT NULL
)
GROUP BY id;
STR NO_OVERLAPP_CNT START_POSITION
------------------ --------------- -----------------
1HIHIHI8HIHIHI 2 2,9
HIHIHI7HIHIHI 2 1,8
