OraQA

Oracle Question and Answer

  • Do you have a solution to a problem? Do you have an unanswered question? Login and share it with the Oracle community. More...

Oracle News


Entries RSS feed

Comments RSS feed

How to assign the same number identifier to different groups that contain the identical set of data in a SQL statement

May 24th, 2007 By Frank Zhou

The following three SQL patterns can be used to assign the same number to different groups that contain the identical set of data.

create table test (date_time date, validity number);

alter table test add constraint u_const unique(date_time, validity)

insert into test (date_time, validity)
values (to_date('08-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('08-01-2007', 'dd-mm-yyyy'), 27335);
insert into test (date_time, validity)
values (to_date('08-01-2007', 'dd-mm-yyyy'), 27336);
insert into test (date_time, validity)
values (to_date('09-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('09-01-2007', 'dd-mm-yyyy'), 27335);
insert into test (date_time, validity)
values (to_date('09-01-2007', 'dd-mm-yyyy'), 27340);
insert into test (date_time, validity)
values (to_date('10-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('10-01-2007', 'dd-mm-yyyy'), 27335);
insert into test (date_time, validity)
values (to_date('10-01-2007', 'dd-mm-yyyy'), 27336);
insert into test (date_time, validity)
values (to_date('11-01-2007', 'dd-mm-yyyy'), 27334);
insert into test (date_time, validity)
values (to_date('11-01-2007', 'dd-mm-yyyy'), 27335);

SQL> select * from test;

DATE_TIME   VALIDITY
--------- ----------
08-JAN-07  27334 08-JAN-07 contains(2734, 27335, 27336)
08-JAN-07  27335
08-JAN-07  27336
09-JAN-07  27334 09-JAN-07 contains(27334, 27335, 27340)
09-JAN-07  27335
09-JAN-07  27340
10-JAN-07  27334 10-JAN-07 contains(2734, 27335, 27336)
10-JAN-07  27335
10-JAN-07  27336
11-JAN-07  27334 11-JAN-07 contains(27334, 27335)
11-JAN-07  27335                                                           

11 rows selected.

10-JAN-07 and 08-JAN-07 contain identical set of data, so the same number should be assigned to them.

————————————10G Solution one ————————–

SELECT date_time,
       SUBSTR(fin_str,
              INSTR(fin_str, ',', 1, LEVEL  ) + 1,
              INSTR(fin_str, ',', 1, LEVEL+1) -
              INSTR(fin_str, ',', 1, LEVEL) -1 ) validity,
    grp
FROM
( SELECT date_time, ','||fin_str||',' as fin_str,
         DENSE_RANK( ) OVER (ORDER BY fin_str) grp
   FROM ( SELECT date_time, validity, validity_str, fin_str
            FROM test
          MODEL
          PARTITION BY (date_time)
          DIMENSION BY (row_number() OVER (PARTITION BY date_time ORDER
                                           BY validity) rn)
          MEASURES (CAST(NULL AS VARCHAR2(3999)) validity_str,
          validity, CAST(NULL AS VARCHAR2(3999)) fin_str
                   )
         RULES
        (validity_str[ANY] ORDER BY  rn  =
         CASE WHEN validity[cv() - 1] IS NULL
              THEN TO_CHAR(validity[cv()])
              ELSE CASE WHEN instr(validity_str[CV()-1],
         TO_CHAR(validity[cv()]))<1
                        THEN validity_str[cv()-1]
                             ||','||TO_CHAR(validity[cv()])
    ELSE validity_str[cv()-1]
    END
     END,
    fin_str[ANY] ORDER BY  rn  =
         CASE WHEN validity[cv() + 1 ] IS NULL
              THEN validity_str[CV()]
         END
        )
      )
   WHERE fin_str IS NOT NULL
  )
 CONNECT BY PRIOR date_time = date_time
 AND INSTR (fin_str, ',', 1, LEVEL+1) > 0
 AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
 ORDER BY date_time, validity;

DATE_TIME VALIDITY                  GRP
--------- ------------------ ----------
08-JAN-07 27334                       2
08-JAN-07 27335                       2
08-JAN-07 27336                       2
09-JAN-07 27334                       3
09-JAN-07 27335                       3
09-JAN-07 27340                       3
10-JAN-07 27334                       2
10-JAN-07 27335                       2
10-JAN-07 27336                       2
11-JAN-07 27334                       1
11-JAN-07 27335                       1

11 rows selected.

——————-Solution two ————————–

SELECT date_time  ,
       SUBSTR(fin_str,
              INSTR(fin_str, ',', 1, LEVEL  ) + 1,
              INSTR(fin_str, ',', 1, LEVEL+1) -
              INSTR(fin_str, ',', 1, LEVEL) -1 ) validity,
         grp
FROM (SELECT date_time,fin_str||',' as fin_str,
             DENSE_RANK( ) OVER (ORDER BY fin_str) grp
       FROM (SELECT date_time, validity, fin_str,
                   MAX(LENGTH(fin_str)-LENGTH(REPLACE(fin_str,',','')))
                    OVER (PARTITION BY date_time) max_path,
           LENGTH(fin_str)-LENGTH(REPLACE(fin_str,',','')) str_len
             FROM (SELECT date_time, validity,
                    sys_connect_by_path(validity,',') fin_str
               FROM (SELECT DISTINCT date_time, validity
                               FROM test)
               CONNECT BY PRIOR date_time = date_time
               AND PRIOR validity <validity
             )
      )
  WHERE max_path = str_len
     )
CONNECT BY PRIOR date_time = date_time
AND INSTR (fin_str, ',', 1, LEVEL+1) > 0
AND PRIOR dbms_random.string ('p', 10) IS NOT NULL
ORDER BY date_time, validity;  

DATE_TIME VALIDITY                  GRP
--------- ------------------ ----------
08-JAN-07 27334                       2
08-JAN-07 27335                       2
08-JAN-07 27336                       2
09-JAN-07 27334                       3
09-JAN-07 27335                       3
09-JAN-07 27340                       3
10-JAN-07 27334                       2
10-JAN-07 27335                       2
10-JAN-07 27336                       2
11-JAN-07 27334                       1
11-JAN-07 27335                       1

11 rows selected.

——————-Solution Three ————————–

SELECT t.date_time, extractvalue(column_value,'/X') validity, t.grp
FROM
(SELECT len,XML, date_time,
 DENSE_RANK( ) OVER (ORDER BY
                     CASE WHEN len <=  4000
                            THEN CAST(VALIDITYs AS VARCHAR2(4000))
		   ELSE CAST(len||dbms_lob.substr(VALIDITYs,995,1)
			          ||dbms_lob.substr(VALIDITYs,-995,len)
			          ||dbms_lob.substr(VALIDITYs,995,trunc(len/2))
		                       ||dbms_lob.substr(VALIDITYs, -995,trunc(len/2))
                                     AS VARCHAR2(4000))
                      END ) grp
FROM (SELECT date_time, XMLSERIALIZE(CONTENT XML) VALIDITYs,
             dbms_lob.getlength(XMLSERIALIZE(CONTENT XML)) len, XML
        FROM (SELECT date_time, XMLAgg(XMLElement(x,VALIDITY) ORDER BY VALIDITY) XML
                FROM test GROUP BY  date_time
             )
     )
) t, TABLE(xmlsequence(t.XML ))
ORDER BY date_time, validity; 

DATE_TIME VALIDITY                  GRP
---------  ------------------      ----------
08-JAN-07 27334                       2
08-JAN-07 27335                       2
08-JAN-07 27336                       2
09-JAN-07 27334                       3
09-JAN-07 27335                       3
09-JAN-07 27340                       3
10-JAN-07 27334                       2
10-JAN-07 27335                       2
10-JAN-07 27336                       2
11-JAN-07 27334                       1
11-JAN-07 27335                       1

11 rows selected.

One Response to “How to assign the same number identifier to different groups that contain the identical set of data in a SQL statement”

  1. newkid Says:
    SELECT date_time, validity, DENSE_RANK() OVER(ORDER BY elements) AS id
      FROM (SELECT date_time, validity, MAX(path) OVER(PARTITION BY date_time) elements
              FROM (SELECT t.*,SYS_CONNECT_BY_PATH(validity,’/') path
                      FROM (SELECT test.*,ROW_NUMBER() OVER(PARTITION BY date_time ORDER BY validity) rn
                              FROM test
                              ) t
                    START WITH rn=1 CONNECT BY date_time = PRIOR date_time AND rn = PRIOR rn+1
                    )
    )
    
    11GR2:
    SELECT date_time, validity, DENSE_RANK() OVER(ORDER BY elements) AS id
      FROM (SELECT test.*,LISTAGG(validity,’,') WITHIN GROUP (ORDER BY validity) OVER(PARTITION BY date_time) elements
              FROM test
            );
    

Leave a Reply

You must be logged in to post a comment.

RSS feed for comments on this question