How to divide consecutive rows into groups based on the value of the smallest running total in SQL
August 2nd, 2007 By Frank Zhou
The following SQL Model Clause pattern can be used to divide consecutive rows into groups based on the smallest running total that are larger than a predetermined value.
create table x (id number, value number);
insert into x
select rownum, trunc(dbms_random.value(1, 10))
from dba_users
where rownum < 30
variable input number
exec :input := 20;
SELECT id,value, Running_tot, group_id
FROM (SELECT id, value, Running_tot, group_id
FROM (SELECT id, value, count(*) OVER( ) counter,
ROW_NUMBER() OVER (ORDER BY id) rn FROM x)
MODEL
DIMENSION BY (rn)
MEASURES (id,value, counter,CAST(NULL AS NUMBER) Running_tot,
value tmp, CAST(NULL AS NUMBER) group_id, CAST(NULL AS NUMBER) rown )
RULES ITERATE(10000)
UNTIL (ITERATION_NUMBER>= counter[1])
(
Running_tot[ITERATION_NUMBER]
= CASE WHEN tmp[ITERATION_NUMBER-1] IS NULL
THEN tmp[ITERATION_NUMBER]
ELSE tmp[ITERATION_NUMBER] +
Running_tot[cv()-1]
END,
tmp[ITERATION_NUMBER] = CASE WHEN Running_tot[ITERATION_NUMBER] < :input
THEN 0
END,
rown[ITERATION_NUMBER] = CASE WHEN Running_tot[CV()-1] IS NULL
THEN 1
WHEN tmp[ITERATION_NUMBER-1] IS NULL
THEN ITERATION_NUMBER
END,
group_id[ITERATION_NUMBER] = MAX(rown)[ANY]
)
)
WHERE id IS NOT NULL
ORDER BY id;
ID VALUE RUNNING_TOT GROUP_ID
---------- ---------- ----------- ----------
1 3 3 1
2 7 10 1
3 9 19 1
4 8 27 1
5 7 7 5
6 9 16 5
7 2 18 5
8 4 22 5
9 6 6 9
10 3 9 9
11 7 16 9
12 5 21 9
13 8 8 13
14 3 11 13
15 2 13 13
16 8 21 13
17 6 6 17
18 6 12 17
19 3 15 17
20 3 18 17
21 6 24 17
22 1 1 22
23 5 6 22
24 9 15 22
25 1 16 22
26 2 18 22
27 1 19 22
28 9 28 22
29 3 3 29
29 rows selected.

August 3rd, 2007 at 8:13 am
Ironically I was working on this very problem last night… then I saw your solution. I was trying with analytic SQL and CONNECT BY and wasn’t having a good go of it. So thanks for saving me a bunch of time!
This sometimes fails if the input column has values larger than your threshold. I fixed it thus (I’m backporting my change to your original version since I rewrote it some, so double-check it):
August 3rd, 2007 at 8:15 am
…except I forgot to backport, it should look like:
You were using “tmp” both as the source of numbers to add, and as a flag of when to cut to the next group. I split that by using “value” directly, and “tmp” became “flag”.
August 3rd, 2007 at 10:31 am
Thanks for pointing out this special case !
The query has fixed.
August 6th, 2007 at 5:12 am
The same (I hope) using Analytics:
SELECT
id,value,
SUM(value) OVER(PARTITION BY group_id ORDER BY id) running_tot,
group_id
FROM(SELECT id,value,sum_cum,
NVL(TRUNC(( sum_cum-value)/:input),0) group_id
FROM(SELECT id,value,
SUM(value) over(ORDER BY id) sum_cum
FROM x))
August 6th, 2007 at 7:27 am
Alas, that’s not the same since you don’t restart the count when you create a new group. If you have a running total of 18 and your group size is 20, and the next row adds 5, 18 + 5 = 23 > 20 so you start a new group, but the running total is now 3, when it should be 0–you don’t want to cut the subsequent group when there are only 17 more items.
August 6th, 2007 at 7:44 am
And now?
SELECT
id,value,
SUM(value) OVER(PARTITION BY grp ORDER BY id) running_tot,
FIRST_VALUE (cnt)OVER(PARTITION BY grp ORDER BY id) group_id
FROM(SELECT id,value,sum_cum,
NVL(TRUNC(( sum_cum-value)/20),0) grp,
count(*) over(ORDER BY id) cnt
FROM(SELECT id,value,
SUM(value) over(ORDER BY id) sum_cum
FROM x))
August 6th, 2007 at 8:21 am
That doesn’t change the fact that you’re not resetting the cumulative sum when you need to.
SQL> set echo on SQL> @go1 SQL> WITH x AS ( 2 SELECT 1 id, 10 value FROM dual UNION ALL 3 SELECT 2 id, 8 value FROM dual UNION ALL 4 SELECT 3 id, 5 value FROM dual UNION ALL 5 SELECT 4 id, 10 value FROM dual UNION ALL 6 SELECT 5 id, 7 value FROM dual UNION ALL 7 SELECT 6 id, 8 value FROM dual UNION ALL 8 SELECT 7 id, 4 value FROM dual 9 ) 10 SELECT 11 id, 12 value, 13 SUM(value) OVER (PARTITION BY grp ORDER BY id) running_tot, 14 FIRST_VALUE(cnt) OVER (PARTITION BY grp ORDER BY id) group_id 15 FROM ( 16 SELECT 17 id, 18 value, 19 sum_cum, 20 NVL(TRUNC((sum_cum-value)/20),0) grp, 21 count(*) over(ORDER BY id) cnt 22 FROM ( 23 SELECT 24 id, 25 value, 26 SUM(value) over (ORDER BY id) sum_cum 27 FROM x 28 ) 29 ) 30 / ID VALUE RUNNING_TOT GROUP_ID ---------- ---------- ----------- ---------- 1 10 10 1 2 8 18 1 3 5 23 1 4 10 10 4 5 7 17 4 6 8 8 6 7 4 12 6 7 rows selected. Elapsed: 00:00:00.01 SQL> SQL> WITH x AS ( 2 SELECT 1 id, 10 value FROM dual UNION ALL 3 SELECT 2 id, 8 value FROM dual UNION ALL 4 SELECT 3 id, 5 value FROM dual UNION ALL 5 SELECT 4 id, 10 value FROM dual UNION ALL 6 SELECT 5 id, 7 value FROM dual UNION ALL 7 SELECT 6 id, 8 value FROM dual UNION ALL 8 SELECT 7 id, 4 value FROM dual 9 ) 10 SELECT id,value, Running_tot, group_id 11 FROM (SELECT id, value, Running_tot, group_id 12 FROM (SELECT id, value, count(*) OVER( ) counter, 13 ROW_NUMBER() OVER (ORDER BY id) rn FROM x) 14 MODEL 15 DIMENSION BY (rn) 16 MEASURES (id,value, counter,CAST(NULL AS NUMBER) Running_tot, 17 value tmp, CAST(NULL AS NUMBER) group_id, CAST(NULL AS NUMBER) rown ) 18 RULES ITERATE(10000) 19 UNTIL (ITERATION_NUMBER>= counter[1]) 20 ( 21 Running_tot[ITERATION_NUMBER] 22 = CASE WHEN tmp[ITERATION_NUMBER-1] IS NULL 23 THEN tmp[ITERATION_NUMBER] 24 ELSE tmp[ITERATION_NUMBER] + 25 Running_tot[cv()-1] 26 END, 27 tmp[ITERATION_NUMBER] = CASE WHEN Running_tot[ITERATION_NUMBER]August 6th, 2007 at 8:22 am
Looks like my entry got lopped off somehow…
August 6th, 2007 at 8:23 am
OK, I needed to escape that less-than sign. Once more, with feeling…
SQL> WITH x AS ( 2 SELECT 1 id, 10 value FROM dual UNION ALL 3 SELECT 2 id, 8 value FROM dual UNION ALL 4 SELECT 3 id, 5 value FROM dual UNION ALL 5 SELECT 4 id, 10 value FROM dual UNION ALL 6 SELECT 5 id, 7 value FROM dual UNION ALL 7 SELECT 6 id, 8 value FROM dual UNION ALL 8 SELECT 7 id, 4 value FROM dual 9 ) 10 SELECT id,value, Running_tot, group_id 11 FROM (SELECT id, value, Running_tot, group_id 12 FROM (SELECT id, value, count(*) OVER( ) counter, 13 ROW_NUMBER() OVER (ORDER BY id) rn FROM x) 14 MODEL 15 DIMENSION BY (rn) 16 MEASURES (id,value, counter,CAST(NULL AS NUMBER) Running_tot, 17 value tmp, CAST(NULL AS NUMBER) group_id, CAST(NULL AS NUMBER) rown ) 18 RULES ITERATE(10000) 19 UNTIL (ITERATION_NUMBER>= counter[1]) 20 ( 21 Running_tot[ITERATION_NUMBER] 22 = CASE WHEN tmp[ITERATION_NUMBER-1] IS NULL 23 THEN tmp[ITERATION_NUMBER] 24 ELSE tmp[ITERATION_NUMBER] + 25 Running_tot[cv()-1] 26 END, 27 tmp[ITERATION_NUMBER] = CASE WHEN Running_tot[ITERATION_NUMBER] < 20 /* input */ 28 THEN 0 29 END, 30 rown[ITERATION_NUMBER] = CASE WHEN Running_tot[CV()-1] IS NULL 31 THEN 1 32 WHEN tmp[ITERATION_NUMBER-1] IS NULL 33 THEN ITERATION_NUMBER 34 END, 35 group_id[ITERATION_NUMBER] = MAX(rown)[ANY] 36 ) 37 ) 38 WHERE id IS NOT NULL 39 ORDER BY id 40 / ID VALUE RUNNING_TOT GROUP_ID ---------- ---------- ----------- ---------- 1 10 10 1 2 8 18 1 3 5 23 1 4 10 10 4 5 7 17 4 6 8 25 4 7 4 4 7 7 rows selected. Elapsed: 00:00:00.01August 9th, 2007 at 5:11 am
OK, I found the difference. This is OK now?
WITH x AS (
SELECT 1 id, 10 value FROM dual UNION ALL
SELECT 2 id, 8 value FROM dual UNION ALL
SELECT 3 id, 5 value FROM dual UNION ALL
SELECT 4 id, 10 value FROM dual UNION ALL
SELECT 5 id, 7 value FROM dual UNION ALL
SELECT 6 id, 8 value FROM dual UNION ALL
SELECT 7 id, 4 value FROM dual
)
SELECT
id,value,
SUM(value) OVER(PARTITION BY grp ORDER BY id) running_tot,
FIRST_VALUE (cnt)OVER(PARTITION BY grp ORDER BY id) group_id
FROM(SELECT
id,value,sum_cum,
NVL(TRUNC(( sum_cum-value-1)/20),0) grp,
COUNT(*) over(ORDER BY id) cnt
FROM(SELECT
id,value,
SUM(value) over(ORDER BY id) sum_cum
FROM x))
August 9th, 2007 at 8:36 am
Change the (5,7) in the input data to (5,8) and they’re different again.