OraQA

Oracle Question and Answer

  • Do you have a solution to a problem? Do you have an unanswered question? Login and share it with the Oracle community. More...

Oracle News


Entries RSS feed

Comments RSS feed

How to convert an Internet HTML Table to CSV format in SQL

March 19th, 2012 By Frank Zhou

The following SQL pattern can be used to transform an HTML Table on the web to CSV format.


set long 10000;
variable url_input varchar2(500)
exec :url_input:='http://oraqa.com/2012/02/27/how-to-transform-a-csv-file-into-html-table-in-sql/'


-------------------------------------------------------------------SQL QUERY---------------------------------------------------------------------


WITH HTML_TAB AS
(SELECT CAST(REGEXP_REPLACE(SUBSTR(STR, 0, REGEXP_INSTR(str,'</table>', 1,1,1, 'i') ), '( ){2,}', ' ') 
         AS VARCHAR2(4000)) AS html_str
 FROM 
 (SELECT SUBSTR(str, REGEXP_INSTR(str,'<table', 1,1,0,'i')) as str
  FROM
 (SELECT HTTPURITYPE(:url_input).GETCLOB( ) AS str
  FROM DUAL )
 )
),
HEADER AS 
(SELECT col_name, num, 
        XMLCAST(XMLQUERY('fn:count(/table/tr/th)' 
                        PASSING XMLPARSE(DOCUMENT html_str) 
                        RETURNING CONTENT) AS NUMBER) AS Col_NUM 
 FROM HTML_TAB, XMLTABLE('/table/tr/th' 
                         PASSING XMLPARSE(DOCUMENT html_str) 
                         COLUMNS col_name VARCHAR2 (30) PATH '.' , 
                         num FOR ORDINALITY) 
), 
H_DATA AS 
(SELECT LISTAGG(col_name, ',') WITHIN GROUP(ORDER BY num) AS COL_NAMES, 
        MAX(Col_NUM) AS Col_NUM 
  FROM HEADER 
), 
COLUM_DATA AS 
(SELECT rn, col_data||',' AS col_data 
 FROM HTML_TAB, XMLTABLE('/table/tr/td' 
                         PASSING XMLPARSE(DOCUMENT html_str) 
                         COLUMNS  col_data VARCHAR2 (30) PATH '.' , 
                         rn FOR ORDINALITY ) 
 ), 
CSV AS 
(SELECT LISTAGG(col_data, '') WITHIN GROUP (ORDER BY rn) AS str, MAX(rn) AS RANK 
 FROM COLUM_DATA D, H_DATA H 
 GROUP BY CEIL(D.rn/H.Col_NUM) 
)
SELECT CSV_STR 
FROM (SELECT COL_NAMES AS CSV_STR, 0 AS pos, TO_NUMBER(NULL) AS RANK 
      FROM H_DATA 
      UNION ALL 
      SELECT SUBSTR(str, 0, LENGTH (str) - 1), 1, RANK FROM CSV) 
ORDER BY pos, RANK;


CSV_STR
——————————————————————————–

ID,FIRSTNAME,MIDDLENAME,LASTNAME
1,Frank,,Zhou
2,Peter,B,Lee
3,John,C,Adam
4,Dave,,Washington

Leave a Reply

You must be logged in to post a comment.

RSS feed for comments on this question