How to find the earliest start date and the latest end date for consecutive transactions in SQL
October 29th, 2007 By Frank Zhou
The following two SQL patterns can be used to find the earliest start date and latest end date for consecutive transactions within a group in a SQL Statement.
Here are the requirements for consecutive transaction:
1) A previous row’s end date + 1 = current row’s start date OR a current row’s end date + 1 = next row’s start date
2) A row will be filtered out from the result set if there is no consecutive transactions for that row within the same group (ID)
CREATE TABLE transaction
(id NUMBER ,
start_date DATE,
end_date DATE,
PRIMARY KEY (id, start_date, end_date));
ALTER TABLE transaction ADD (CONSTRAINT start_dt UNIQUE (id, start_date));
ALTER TABLE transaction ADD (CONSTRAINT end_dt UNIQUE (id, end_date));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('01/28/2006', 'MM/DD/YYYY'),TO_DATE ('02/07/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('02/08/2006', 'MM/DD/YYYY'),TO_DATE ('02/22/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('02/23/2006', 'MM/DD/YYYY'),TO_DATE ('02/27/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('02/18/2006', 'MM/DD/YYYY'),TO_DATE ('03/29/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('04/08/2006', 'MM/DD/YYYY'), TO_DATE ('04/09/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('04/10/2006', 'MM/DD/YYYY'),TO_DATE ('04/13/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('04/14/2006', 'MM/DD/YYYY'), TO_DATE ('04/17/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('04/28/2006', 'MM/DD/YYYY'), TO_DATE ('05/18/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(1, TO_DATE ('01/18/2006', 'MM/DD/YYYY'), TO_DATE ('01/27/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('01/28/2006', 'MM/DD/YYYY'), TO_DATE ('02/07/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('02/08/2006', 'MM/DD/YYYY'),TO_DATE ('02/22/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('02/23/2006', 'MM/DD/YYYY'), TO_DATE ('02/27/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('04/08/2006', 'MM/DD/YYYY'), TO_DATE ('04/09/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('04/10/2006', 'MM/DD/YYYY'), TO_DATE ('04/13/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('04/14/2006', 'MM/DD/YYYY'), TO_DATE ('04/17/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('04/18/2006', 'MM/DD/YYYY'), TO_DATE ('05/18/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(2, TO_DATE ('01/18/2006', 'MM/DD/YYYY'),TO_DATE ('01/27/2006', 'MM/DD/YYYY'));
INSERT INTO transaction(id, start_date, end_date) VALUES
(3, TO_DATE ('01/03/2006', 'MM/DD/YYYY'), TO_DATE ('05/16/2006', 'MM/DD/YYYY'));
SQL> select * from transaction order by id , start_date;
ID START_DAT END_DATE
---------- --------- ---------
1 18-JAN-06 27-JAN-06 (Earliest start_date : 18-JAN-06 )
1 28-JAN-06 07-FEB-06
1 08-FEB-06 22-FEB-06
1 18-FEB-06 29-MAR-06 (No consecutive trans , filter out )
1 23-FEB-06 27-FEB-06 (Latest end_date : 27-FEB-06 )
1 08-APR-06 09-APR-06 (Earliest start_date : 08-APR-06 )
1 10-APR-06 13-APR-06
1 14-APR-06 17-APR-06 (Latest end_date : 17-APR-06 )
1 28-APR-06 18-MAY-06 (No trans with the group, filter out)
2 18-JAN-06 27-JAN-06 (Earliest start_date : 18-JAN-06 )
2 28-JAN-06 07-FEB-06
2 08-FEB-06 22-FEB-06
2 23-FEB-06 27-FEB-06 (Latest end_date : 27-FEB-06 )
2 08-APR-06 09-APR-06 (Earliest start_date : 08-APR-06 )
2 10-APR-06 13-APR-06
2 14-APR-06 17-APR-06
2 18-APR-06 18-MAY-06 (Latest end_date : 18-MAY-06 )
3 03-JAN-06 16-MAY-06 (No trans with the group, filter out)
18 rows selected.
——————————–SQL Solution One —————————————
SELECT id, MAX(earliest_start) earliest_start_date,
MAX(latest_end) latest_end_date
FROM
(SELECT id, start_date, end_date, earliest_start,latest_end,
row_number( ) OVER (PARTITION BY id,lev ORDER BY rn) AS rn
FROM (SELECT id, start_date,end_date, earliest_start,latest_end, rn
FROM (SELECT id, start_date, end_date
FROM( SELECT id, start_date, end_date,
LAG (end_date) OVER (PARTITION BY id
ORDER BY end_date) +1 lag,
LEAD (start_date) OVER (PARTITION BY id
ORDER BY start_date)-1 lead
FROM transaction
)
WHERE start_date = lag OR end_date = lead
)
MODEL
PARTITION BY ( id )
DIMENSION BY(row_number() OVER(PARTITION BY id ORDER BY start_date) rn)
MEASURES(start_date, end_date, CAST(NULL AS DATE) earliest_start,
CAST(NULL AS DATE) latest_end)
RULES
(
earliest_start[ANY] ORDER BY rn =
CASE WHEN start_date[CV()-1] IS NULL OR start_date[CV()]> end_date[CV()-1]+1
THEN start_date[CV( )] END,
latest_end[ANY] ORDER BY rn =
CASE WHEN end_date[CV()+1] IS NULL OR end_date[CV()]<start_date[CV()+1]-1
THEN end_date[CV( )] END
)
), (SELECT LEVEL lev FROM dual CONNECT BY LEVEL <= 2)
WHERE CASE WHEN Lev = 1 THEN earliest_start
WHEN Lev = 2 THEN latest_end
END IS NOT NULL
)
GROUP BY id, rn;
ID EARLIEST_ LATEST_END
---------- --------- ----------
1 18-JAN-06 27-FEB-06
1 08-APR-06 17-APR-06
2 18-JAN-06 27-FEB-06
2 08-APR-06 18-MAY-06
——————————–SQL Solution Two —————————————————
SELECT id, min(to_date(substr(start_date,1,
(instr(start_date,',', 1)-1)),'DD-MM-YYYY')) earliest_start_date,
min(to_date(substr(end_date,
(instr(end_date,',', -1)+1)),'DD-MM-YYYY')) latest_end_date
FROM (SELECT id,
LTRIM(sys_connect_by_path(to_char
(start_date,'DD-MM-YYYY'),','),',') start_date,
LTRIM(sys_connect_by_path(to_char
(end_date,'DD-MM-YYYY'),','),',') end_date
FROM (SELECT id, start_date , end_date
FROM (SELECT id, start_date, end_date,
LAG(end_date) OVER (PARTITION BY id
ORDER BY end_date) + 1 lag,
LEAD(start_date) OVER (PARTITION BY id
ORDER BY start_date) - 1 lead
FROM transaction
)
WHERE start_date = lag OR end_date = lead
)
WHERE CONNECT_BY_ISLEAF =1
CONNECT BY PRIOR end_date + 1 = start_date
AND prior id = id
)
GROUP BY id, to_date(substr(end_date,(instr(end_date,',',-1)+1)),'DD-MM-YYYY');
ID EARLIEST_ LATEST_EN
---------- --------- ---------
1 18-JAN-06 27-FEB-06
1 08-APR-06 17-APR-06
2 18-JAN-06 27-FEB-06
2 08-APR-06 18-MAY-06

November 1st, 2007 at 9:03 pm
I like the puzzle. I had to tackle something similar to this about a year ago. I had to find the “Maximum Contiguous Minimum Effective Date,” or start date for a member.
I think I had to kludge mine a bit (I’d still like to learn the MODEL clause). Anyway, here it goes:
SELECT id, min_date, max_date FROM ( SELECT id, start_date, end_date, connect_by_isleaf isleaf, level levelof, TO_DATE( SUBSTR( SYS_CONNECT_BY_PATH( TO_CHAR( start_date, 'MMDDYYYY' ), '-' ), 2, 8 ), 'MMDDYYYY' ) min_date, ( CASE WHEN CONNECT_BY_ISLEAF = 1 THEN end_date END ) max_date FROM ( SELECT other_id, id, start_date, end_date, ( CASE WHEN start_date = LAG( end_date + 1 ) OVER ( PARTITION BY id ORDER BY end_date ) THEN LAG( other_id ) OVER ( PARTITION BY id ORDER BY end_date ) END ) contiguous FROM ( SELECT id, rownum other_id, start_date, end_date FROM transaction ) ) START WITH contiguous IS NULL CONNECT BY PRIOR other_id = contiguous ) WHERE isleaf = 1 AND levelof > 1; ID MIN_DATE MAX_DATE ---------- --------- --------- 1 18-JAN-06 27-FEB-06 1 08-APR-06 17-APR-06 2 18-JAN-06 27-FEB-06 2 08-APR-06 18-MAY-06