How to divide a data set into buckets with an equal interval size
February 2nd, 2007 By Claudiu Ariton
The purpose of these scripts is to construct equiwidth histograms, in which the histogram range is divided into intervals that have identical size.
Install script:
DROP TABLE intervals
/
CREATE TABLE intervals(NAME VARCHAR2(100), sal NUMBER(10,2))
/
INSERT INTO intervals
(NAME, sal)
SELECT object_name, DBMS_RANDOM.VALUE (0, 100)
FROM all_objects
/
COMMIT
/
Case scenario 1 - using on-the-fly pivot table
set pagesize 200
set linesize 200
WITH x AS
(SELECT MIN (sal) mn, MAX (sal) mx
FROM intervals)
SELECT (l - 1) * (mx - mn) / 10 start_value,
(l) * (mx - mn) / 10 end_value, COUNT (1) no_of_values
FROM intervals i,
(SELECT mn, mx
FROM x),
(SELECT LEVEL l
FROM DUAL
CONNECT BY LEVEL <= 10)
WHERE sal > (CASE
WHEN l = 1
THEN mn - 0.001
ELSE (l - 1) * (mx - mn) / 10
END)
AND sal <= (l) * (mx - mn) / 10
GROUP BY (l - 1) * (mx - mn) / 10, (l) * (mx - mn) / 10
Case scenario 2 - using width_bucket analytical function
set pagesize 200
set linesize 200
SELECT (int_no - 1) * (end_int - start_int) / 10 start_value,
(int_no) * (end_int - start_int) / 10 end_value,
COUNT (1) no_of_values
FROM (SELECT NAME,
sal,
WIDTH_BUCKET (sal, 0, 100.001, 10) int_no,
MIN (sal) OVER (ORDER BY sal ASC) start_int,
MAX (sal) OVER (ORDER BY sal DESC) end_int
FROM intervals)
GROUP BY int_no, start_int, end_int
/
Output 1:
START_VALUE END_VALUE NO_OF_VALUES ----------- ---------- ------------ 0 10 5061 10 20 5126 20 30 5195 30 40 5136 40 50 5058 50 60 5173 60 70 5221 70 80 5028 80 90 5053 90 100 5018
Output 2:
START_VALUE END_VALUE NO_OF_VALUES ----------- ---------- ------------ 0 10 5061 10 20 5126 20 30 5195 30 40 5136 40 50 5058 50 60 5173 60 70 5221 70 80 5028 80 90 5053 90 100 5018
