How to assign the same number identifier to different groups that contain the identical set of data in a SQL statement
May 24th, 2007 By Frank Zhou
The following three SQL patterns can be used to assign the same number to different groups that contain the identical set of data.
create table test (date_time date, validity number);
alter table test add constraint u_const unique(date_time, validity)
insert into test (date_time, validity)
values (to_date('08-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('08-01-2007', 'dd-mm-yyyy'), 27335);
insert into test (date_time, validity)
values (to_date('08-01-2007', 'dd-mm-yyyy'), 27336);
insert into test (date_time, validity)
values (to_date('09-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('09-01-2007', 'dd-mm-yyyy'), 27335);
insert into test (date_time, validity)
values (to_date('09-01-2007', 'dd-mm-yyyy'), 27340);
insert into test (date_time, validity)
values (to_date('10-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('10-01-2007', 'dd-mm-yyyy'), 27335);
insert into test (date_time, validity)
values (to_date('10-01-2007', 'dd-mm-yyyy'), 27336);
insert into test (date_time, validity)
values (to_date('11-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('11-01-2007', 'dd-mm-yyyy'), 27335);
SQL> select * from test;
DATE_TIME VALIDITY
--------- ----------
08-JAN-07 27334 08-JAN-07 contains(2734, 27335, 27336)
08-JAN-07 27335
08-JAN-07 27336
09-JAN-07 27334 09-JAN-07 contains(27334, 27335, 27340)
09-JAN-07 27335
09-JAN-07 27340
10-JAN-07 27334 10-JAN-07 contains(2734, 27335, 27336)
10-JAN-07 27335
10-JAN-07 27336
11-JAN-07 27334 11-JAN-07 contains(27334, 27335)
11-JAN-07 27335
11 rows selected.
10-JAN-07 and 08-JAN-07 contain identical set of data, so the same number should be assigned to them.
————————————10G Solution one ————————–
SELECT date_time,
SUBSTR(fin_str,
INSTR(fin_str, ',', 1, LEVEL ) + 1,
INSTR(fin_str, ',', 1, LEVEL+1) -
INSTR(fin_str, ',', 1, LEVEL) -1 ) validity,
grp
FROM
( SELECT date_time, ','||fin_str||',' as fin_str,
DENSE_RANK( ) OVER (ORDER BY fin_str) grp
FROM ( SELECT date_time, validity, validity_str, fin_str
FROM test
MODEL
PARTITION BY (date_time)
DIMENSION BY (row_number() OVER (PARTITION BY date_time ORDER
BY validity) rn)
MEASURES (CAST(NULL AS VARCHAR2(3999)) validity_str,
validity, CAST(NULL AS VARCHAR2(3999)) fin_str
)
RULES
(validity_str[ANY] ORDER BY rn =
CASE WHEN validity[cv() - 1] IS NULL
THEN TO_CHAR(validity[cv()])
ELSE CASE WHEN instr(validity_str[CV()-1],
TO_CHAR(validity[cv()]))<1
THEN validity_str[cv()-1]
||','||TO_CHAR(validity[cv()])
ELSE validity_str[cv()-1]
END
END,
fin_str[ANY] ORDER BY rn =
CASE WHEN validity[cv() + 1 ] IS NULL
THEN validity_str[CV()]
END
)
)
WHERE fin_str IS NOT NULL
)
CONNECT BY PRIOR date_time = date_time
AND INSTR (fin_str, ',', 1, LEVEL+1) > 0
AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
ORDER BY date_time, validity;
DATE_TIME VALIDITY GRP
--------- ------------------ ----------
08-JAN-07 27334 2
08-JAN-07 27335 2
08-JAN-07 27336 2
09-JAN-07 27334 3
09-JAN-07 27335 3
09-JAN-07 27340 3
10-JAN-07 27334 2
10-JAN-07 27335 2
10-JAN-07 27336 2
11-JAN-07 27334 1
11-JAN-07 27335 1
11 rows selected.
——————-Solution two ————————–
SELECT date_time ,
SUBSTR(fin_str,
INSTR(fin_str, ',', 1, LEVEL ) + 1,
INSTR(fin_str, ',', 1, LEVEL+1) -
INSTR(fin_str, ',', 1, LEVEL) -1 ) validity,
grp
FROM (SELECT date_time,fin_str||',' as fin_str,
DENSE_RANK( ) OVER (ORDER BY fin_str) grp
FROM (SELECT date_time, validity, fin_str,
MAX(LENGTH(fin_str)-LENGTH(REPLACE(fin_str,',','')))
OVER (PARTITION BY date_time) max_path,
LENGTH(fin_str)-LENGTH(REPLACE(fin_str,',','')) str_len
FROM (SELECT date_time, validity,
sys_connect_by_path(validity,',') fin_str
FROM (SELECT DISTINCT date_time, validity
FROM test)
CONNECT BY PRIOR date_time = date_time
AND PRIOR validity <validity
)
)
WHERE max_path = str_len
)
CONNECT BY PRIOR date_time = date_time
AND INSTR (fin_str, ',', 1, LEVEL+1) > 0
AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
ORDER BY date_time, validity;
DATE_TIME VALIDITY GRP
--------- ------------------ ----------
08-JAN-07 27334 2
08-JAN-07 27335 2
08-JAN-07 27336 2
09-JAN-07 27334 3
09-JAN-07 27335 3
09-JAN-07 27340 3
10-JAN-07 27334 2
10-JAN-07 27335 2
10-JAN-07 27336 2
11-JAN-07 27334 1
11-JAN-07 27335 1
11 rows selected.
——————-Solution Three ————————–
SELECT t.date_time, extractvalue(column_value,'/X') validity, t.grp
FROM
(SELECT len,XML, date_time,
DENSE_RANK( ) OVER (ORDER BY
CASE WHEN len <= 4000
THEN CAST(VALIDITYs AS VARCHAR2(4000))
ELSE CAST(len||dbms_lob.substr(VALIDITYs,995,1)
||dbms_lob.substr(VALIDITYs,-995,len)
||dbms_lob.substr(VALIDITYs,995,trunc(len/2))
||dbms_lob.substr(VALIDITYs, -995,trunc(len/2))
AS VARCHAR2(4000))
END ) grp
FROM (SELECT date_time, XMLSERIALIZE(CONTENT XML) VALIDITYs,
dbms_lob.getlength(XMLSERIALIZE(CONTENT XML)) len, XML
FROM (SELECT date_time, XMLAgg(XMLElement(x,VALIDITY) ORDER BY VALIDITY) XML
FROM test GROUP BY date_time
)
)
) t, TABLE(xmlsequence(t.XML ))
ORDER BY date_time, validity;
DATE_TIME VALIDITY GRP
--------- ------------------ ----------
08-JAN-07 27334 2
08-JAN-07 27335 2
08-JAN-07 27336 2
09-JAN-07 27334 3
09-JAN-07 27335 3
09-JAN-07 27340 3
10-JAN-07 27334 2
10-JAN-07 27335 2
10-JAN-07 27336 2
11-JAN-07 27334 1
11-JAN-07 27335 1
11 rows selected.

July 1st, 2010 at 1:11 pm
SELECT date_time, validity, DENSE_RANK() OVER(ORDER BY elements) AS id FROM (SELECT date_time, validity, MAX(path) OVER(PARTITION BY date_time) elements FROM (SELECT t.*,SYS_CONNECT_BY_PATH(validity,’/') path FROM (SELECT test.*,ROW_NUMBER() OVER(PARTITION BY date_time ORDER BY validity) rn FROM test ) t START WITH rn=1 CONNECT BY date_time = PRIOR date_time AND rn = PRIOR rn+1 ) ) 11GR2: SELECT date_time, validity, DENSE_RANK() OVER(ORDER BY elements) AS id FROM (SELECT test.*,LISTAGG(validity,’,') WITHIN GROUP (ORDER BY validity) OVER(PARTITION BY date_time) elements FROM test );