How to find the largest rectangle of words from a word list in a SQL statement
July 4th, 2007 By Frank Zhou
This SQL pattern can be used to find the largest possible rectangle of letters such that every row forms a word (reading left to right) and every column forms a word (reading top to bottom). A new optimized version of the SQL is located at the bottom of the page, its performance is orders of magnitude faster than the original version of the SQL
CREATE TABLE IOT3
(len, str,
PRIMARY KEY (len, str )
) ORGANIZATION INDEX AS
SELECT length('SUND') as len, 'SUND' as str FROM dual
union all
SELECT length('MOND') as len, 'MOND' as str FROM dual
union all
SELECT length('FRID') as len, 'FRID' as str FROM dual
union all
SELECT length('WESD') as len, 'WESD' as str FROM dual
union all
SELECT length('TUED') as len, 'TUED' as str FROM dual
UNION ALL
SELECT length('MWFST') as len, 'MWFST' as str FROM dual
union all
SELECT length('OERUU') as len, 'OERUU' as str FROM dual
union all
SELECT length('NSINE') as len, 'NSINE' as str FROM dual
union all
SELECT length('DDDDD') as len, 'DDDDD' as str FROM dual;
SQL> select * from iot3;
LEN STR
---------- -----
4 FRID
4 MOND
4 SUND
4 TUED
4 WESD
5 DDDDD
5 MWFST
5 NSINE
5 OERUU
9 rows selected.
COLUMN PATH FORMAT A30
COLUMN WORD FORMAT A10
-----------------------------------------------------------------10G SQL Solution---------------------------------------------
SELECT word, rtrim(ltrim(path,','), ',') path, rect_size
FROM (SELECT path, word, ch_level, word_len * cnt as rect_size,
max(word_len * cnt) OVER ( ) max_rect
FROM (SELECT path, word,len, word_len,
count(*) OVER(partition by path) cnt, ch_level
FROM (SELECT path, word, len,
length(word) word_len, ch_level,
length(path)-length(REPLACE(path,',', '')) -1 as num_chars
FROM (SELECT path,len, str_level, ch_level,
REPLACE(sys_connect_by_path(ch,','),',') word
FROM (SELECT path, len, str_level,SUBSTR(st,LEVEL,1) ch, LEVEL ch_level
FROM (
WITH DATA AS
(SELECT path, len, LEVEL str_level,
SUBSTR(path,
INSTR(path, ',', 1, LEVEL ) + 1,
INSTR(path, ',', 1, LEVEL+1) -
INSTR(path, ',', 1, LEVEL) -1 ) st
FROM( SELECT sys_connect_by_path(str,',')|| ',' path, len
FROM IOT3
CONNECT BY NOCYCLE PRIOR len = len
AND PRIOR str != str
AND LEVEL <= (SELECT max(length(str)) from IOT3)
)
CONNECT BY PRIOR path = path
AND PRIOR len = len
AND INSTR (path, ',', 1, LEVEL+1) > 0
AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
)
SELECT path, len, ST, str_level FROM DATA
)
CONNECT BY PRIOR path = path
AND PRIOR len = len
AND PRIOR st = st
AND level <= len
AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
)
START WITH str_level = 1
CONNECT BY PRIOR path = path
AND PRIOR len = len
AND PRIOR ch_level = ch_level
AND PRIOR str_level + 1 = str_level
AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
)
WHERE word IN (SELECT str FROM IOT3)
)
WHERE num_chars = word_len
)
WHERE cnt = len
)
WHERE max_rect = rect_size
ORDER BY path, ch_level;
WORD PATH RECT_SIZE
---------- ------------------------------ ----------
MWFST MOND,WESD,FRID,SUND,TUED 20
OERUU MOND,WESD,FRID,SUND,TUED 20
NSINE MOND,WESD,FRID,SUND,TUED 20
DDDDD MOND,WESD,FRID,SUND,TUED 20
MOND MWFST,OERUU,NSINE,DDDDD 20
WESD MWFST,OERUU,NSINE,DDDDD 20
FRID MWFST,OERUU,NSINE,DDDDD 20
SUND MWFST,OERUU,NSINE,DDDDD 20
TUED MWFST,OERUU,NSINE,DDDDD 20
9 rows selected.
----------------------An optimized new SQL-----------------------------------
SELECT word, rtrim(ltrim(path,','), ',') path, rect_size
FROM(SELECT path, word,ch_level, word_len * cnt as rect_size,
max(word_len * cnt) OVER ( ) max_rect
FROM (SELECT path, word,len, word_len, count(*) OVER(partition by path) cnt, ch_level
FROM (SELECT path,word,len, length(word) word_len,ch_level,
length(path)-length(REPLACE(path,',', '')) -1 as num_chars
FROM (SELECT path,len, str_level, ch_level,
REPLACE(sys_connect_by_path(ch,','),',') word
FROM (SELECT path, len, str_level,substr(st,LEVEL,1) ch, LEVEL ch_level
FROM (
WITH DATA AS
(SELECT path,len, LEVEL str_level,
substr(path,
instr(path, ',', 1, LEVEL ) + 1,
instr(path, ',', 1, LEVEL+1) -
instr(path, ',', 1, LEVEL) -1) st
FROM
(SELECT sys_connect_by_path(str,',')|| ',' path, len
FROM
(SELECT str,ch_3,len,xml_123,xml_145, xml_167
FROM
(SELECT str as str, substr(str,1,3) as ch_3, len from IOT3),
(SELECT xmlserialize(content xmlagg(xmlelement(x,substr(str,1,3)) ORDER BY str)) xml_123,
xmlserialize(content xmlagg(xmlelement(x,substr(str,1,1)||substr(str,4,2)) ORDER BY str)) xml_145,
xmlserialize(content xmlagg(xmlelement(x,substr(str,1,1)||substr(str,6,2)) ORDER BY str)) xml_167
FROM IOT3)
)
WHERE LEVEL >=2
AND CONNECT_BY_ISLEAF =1
CONNECT BY NOCYCLE PRIOR len = len
AND PRIOR str != str
AND LEVEL <= (SELECT max(length(str)) from IOT3)
AND CASE WHEN LEVEL =3
THEN CASE WHEN instr(xml_123,CONNECT_BY_ROOT substr(str,1,1)||PRIOR substr(str,1,1)||substr(str,1,1))>0
AND instr(xml_123,CONNECT_BY_ROOT substr(str,2,1)||PRIOR substr(str,2,1)||substr(str,2,1))>0
AND instr(xml_123,CONNECT_BY_ROOT substr(str,3,1)||PRIOR substr(str,3,1)||substr(str,3,1))>0
THEN 1
END
ELSE 1 END = 1
--------------------------------------------------------------------------------------------------------
AND CASE WHEN LEVEL = 3
THEN CASE len
WHEN 4
THEN CASE WHEN instr(xml_123,CONNECT_BY_ROOT substr(str,4,1)||PRIOR substr(str,4,1)||substr(str,4,1))>0
THEN 1
END
WHEN 5
THEN CASE WHEN instr(xml_123,CONNECT_BY_ROOT substr(str,5,1)||PRIOR substr(str,5,1)||substr(str,5,1))>0
THEN 1
END
WHEN 6
THEN CASE WHEN instr(xml_123,CONNECT_BY_ROOT substr(str,6,1)||PRIOR substr(str,6,1)||substr(str,6,1))>0
THEN 1
END
WHEN 7
THEN CASE WHEN instr(xml_123,CONNECT_BY_ROOT substr(str,7,1)||PRIOR substr(str,7,1)||substr(str,7,1))>0
THEN 1
END
WHEN 8
THEN CASE WHEN instr(xml_123,CONNECT_BY_ROOT substr(str,8,1)||PRIOR substr(str,8,1)||substr(str,8,1))>0
THEN 1
END
ELSE 1
END
ELSE 1 END = 1
------------------------------------------------------------------------------------------------------
AND CASE WHEN LEVEL = 5
THEN CASE WHEN instr(xml_145,CONNECT_BY_ROOT substr(str,1,1)||PRIOR substr(str,1,1)||substr(str,1,1))>0
AND instr(xml_145,CONNECT_BY_ROOT substr(str,2,1)||PRIOR substr(str,2,1)||substr(str,2,1))>0
AND instr(xml_145,CONNECT_BY_ROOT substr(str,3,1)||PRIOR substr(str,3,1)||substr(str,3,1))>0
THEN 1
END
ELSE 1 END = 1
---------------------------------------------------------------------------------------------------------
AND CASE WHEN LEVEL = 5
THEN CASE len
WHEN 4
THEN CASE WHEN instr(xml_145,CONNECT_BY_ROOT substr(str,4,1)||PRIOR substr(str,4,1)||substr(str,4,1))>0
THEN 1
END
WHEN 5
THEN CASE WHEN instr(xml_145,CONNECT_BY_ROOT substr(str,5,1)||PRIOR substr(str,5,1)||substr(str,5,1))>0
THEN 1
END
WHEN 6
THEN CASE WHEN instr(xml_145,CONNECT_BY_ROOT substr(str,6,1)||PRIOR substr(str,6,1)||substr(str,6,1))>0
THEN 1
END
WHEN 7
THEN CASE WHEN instr(xml_145,CONNECT_BY_ROOT substr(str,7,1)||PRIOR substr(str,7,1)||substr(str,7,1))>0
THEN 1
END
WHEN 8
THEN CASE WHEN instr(xml_145,CONNECT_BY_ROOT substr(str,8,1)||PRIOR substr(str,8,1)||substr(str,8,1))>0
THEN 1
END
ELSE 1
END
ELSE 1 END = 1
--------------------------------------------------------------------------------------------------------
AND CASE WHEN LEVEL = 7
THEN CASE WHEN instr(xml_167,CONNECT_BY_ROOT substr(str,1,1)||PRIOR substr(str,1,1)||substr(str,1,1))>0
AND instr(xml_167,CONNECT_BY_ROOT substr(str,2,1)||PRIOR substr(str,2,1)||substr(str,2,1))>0
AND instr(xml_167,CONNECT_BY_ROOT substr(str,3,1)||PRIOR substr(str,3,1)||substr(str,3,1))>0
THEN 1
END
ELSE 1 END = 1
---------------------------------------------------------------------------------------------------------
AND CASE WHEN LEVEL = 7
THEN CASE len
WHEN 4
THEN CASE WHEN instr(xml_167,CONNECT_BY_ROOT substr(str,4,1)||PRIOR substr(str,4,1)||substr(str,4,1))>0
THEN 1
END
WHEN 5
THEN CASE WHEN instr(xml_167,CONNECT_BY_ROOT substr(str,5,1)||PRIOR substr(str,5,1)||substr(str,5,1))>0
THEN 1
END
WHEN 6
THEN CASE WHEN instr(xml_167,CONNECT_BY_ROOT substr(str,6,1)||PRIOR substr(str,6,1)||substr(str,6,1))>0
THEN 1
END
WHEN 7
THEN CASE WHEN instr(xml_167,CONNECT_BY_ROOT substr(str,7,1)||PRIOR substr(str,7,1)||substr(str,7,1))>0
THEN 1
END
WHEN 8
THEN CASE WHEN instr(xml_167,CONNECT_BY_ROOT substr(str,8,1)||PRIOR substr(str,8,1)||substr(str,8,1))>0
THEN 1
END
ELSE 1
END
ELSE 1 END = 1
------------------------------------------------------------------------------
)
CONNECT BY PRIOR path = path
AND PRIOR len = len
AND INSTR (path, ',', 1, LEVEL+1) > 0
AND PRIOR dbms_random.string ('p', 20) IS NOT NULL
)
SELECT path, len, ST, str_level FROM DATA
)
CONNECT BY PRIOR path = path
AND PRIOR len = len
AND PRIOR st = st
AND lEVEL <= len
AND PRIOR dbms_random.string ('p', 20) IS NOT NULL
)
WHERE CONNECT_BY_ISLEAF =1
CONNECT BY PRIOR path = path
AND CONNECT_BY_ROOT str_level =1
AND PRIOR len = len
AND PRIOR ch_level = ch_level
AND PRIOR str_level + 1 = str_level
AND PRIOR dbms_random.string ('p', 20) IS NOT NULL
)
WHERE word IN (SELECT str FROM IOT3)
)
WHERE num_chars = word_len
)
WHERE cnt = len
)
WHERE max_rect = rect_size
ORDER BY path, ch_level;
WORD PATH RECT_SIZE
---------- ------------------------------ ----------
MWFST MOND,WESD,FRID,SUND,TUED 20
OERUU MOND,WESD,FRID,SUND,TUED 20
NSINE MOND,WESD,FRID,SUND,TUED 20
DDDDD MOND,WESD,FRID,SUND,TUED 20
MOND MWFST,OERUU,NSINE,DDDDD 20
WESD MWFST,OERUU,NSINE,DDDDD 20
FRID MWFST,OERUU,NSINE,DDDDD 20
SUND MWFST,OERUU,NSINE,DDDDD 20
TUED MWFST,OERUU,NSINE,DDDDD 20
9 rows selected.
Elapsed: 00:00:01.00

July 4th, 2007 at 11:19 pm
Sorry but I just do not get what you’re trying to do here.