OraQA

Oracle Question and Answer

  • Do you have a solution to a problem? Do you have an unanswered question? Login and share it with the Oracle community. More...

Oracle News


Entries RSS feed

Comments RSS feed

How to find matched rows between 2 tables based on required matching criteria in a SQL statement

June 24th, 2007 By Frank Zhou

The following SQL pattern can be used to find matched rows between 2 tables based on required matching criteria in a SQL statement.

Here are the requirements.

1) matching rows in t1 to t2
(t2_compare BETWEEN t1_compare-2 AND t1_compare+2 )
2) match to a t2 row at most once, once a row is matched, it can’t be used again.
3) The goal is to match as many pairs as possible

create table t1 ( pk integer primary key, t1_compare integer, t1_data  varchar2(10));
create table t2 ( pk integer primary key, t2_compare integer, t2_data varchar2(10));
insert into t1 values ( 1, 10, 'T1 ROW1' );
insert into t1 values ( 2, 12, 'T1 ROW2' );
insert into t1 values ( 3, 14, 'T1 ROW3' );
insert into t1 values ( 4, 16, 'T1 ROW4' );
insert into t1 values ( 5, 18, 'T1 ROW5' );
insert into t1 values ( 6, 19, 'T1 ROW6' );
insert into t1 values ( 7, 128, 'T1 ROW8');
insert into t2 values ( 1, 11, 'T2 ROW1' );
insert into t2 values ( 2, 12, 'T2 ROW2' );
insert into t2 values ( 3, 14, 'T2 ROW3' );
insert into t2 values ( 4, 15, 'T2 ROW4' );
insert into t2 values ( 5, 19, 'T2 ROW5' );
insert into t2 values ( 6, 19, 'T2 ROW6' );
insert into t2 values ( 8, 28, 'T2 ROW18');
insert into t2 values ( 9, 68, 'T2 ROW28');
SQL> select * from t1;

        PK T1_COMPARE T1_DATA
---------- ---------- ----------
         1         10 T1 ROW1
         2         12 T1 ROW2
         3         14 T1 ROW3
         4         16 T1 ROW4
         5         18 T1 ROW5
         6         19 T1 ROW6
         7        128 T1 ROW8                                                  

7 rows selected.

SQL> select * from t2;

        PK T2_COMPARE T2_DATA
---------- ---------- ----------
         1         11 T2 ROW1
         2         12 T2 ROW2
         3         14 T2 ROW3
         4         15 T2 ROW4
         5         19 T2 ROW5
         6         19 T2 ROW6
         8         28 T2 ROW18
         9         68 T2 ROW28                                                 

8 rows selected.

SQL> COLUMN t2_pk FORMAT   A8
SQL> COLUMN unmatched_t2 FORMAT A8
SQL> COLUMN STATUS FORMAT  A10

SELECT t1_pk, match as t2_pk, tmp_str as unmatched_t2,
       CASE WHEN t1_pk IS NULL
     THEN 'NO T1'
     WHEN match IS NULL
     THEN 'NO T2'
     ELSE 'MATCHED'
       END AS STATUS
FROM
(SELECT t1.pk t1_pk, row_number() OVER (ORDER BY t1.pk) position,
        ltrim(regexp_replace(XMLAgg(XMLElement(x,t2.pk)
        order by t2.pk),'<X>|</X><X>|</X>',','),',') str,
        CASE WHEN count(t2.pk) != 0 AND t1.pk IS NOT NULL
             THEN count(t2.pk)
      ELSE NULL
        END ct ,
        count(t1.pk) OVER ( ) counter
  FROM t1 FULL OUTER JOIN t2
       ON ( t2_compare BETWEEN t1_compare-2 AND t1_compare+2 )
 GROUP BY t1.pk
)
MODEL
DIMENSION BY (position)
MEASURES (t1_pk, str, ct, str as tmp_str, CAST(NULL AS NUMBER) min_tmp,
          counter, ct ct_tmp, CAST(NULL AS VARCHAR2(4000)) match,
          CAST(NULL AS NUMBER) dup, CAST(NULL AS NUMBER) min_dup,
          CAST(NULL AS VARCHAR2(4000)) tmp_ch ,
          CAST(NULL AS VARCHAR2(4000)) tmp_cp
  )
RULES
ITERATE (1000000) UNTIL (ITERATION_NUMBER>= counter[1])
(
 min_tmp[ANY] = min(ct_tmp)[ANY],
 dup[ANY] = CASE WHEN ct_tmp[CV()] = min_tmp[1]
                 THEN t1_pk[CV()]
   ELSE NULL
     END,
-----------------------------------------------------------------------
min_dup[ANY] = min(dup)[ANY],
tmp_ch[ANY] = CASE WHEN t1_pk[CV()] = min_dup[1]
            THEN substr(tmp_str[CV()],0,
                               instr(tmp_str[CV()],',',1)-1)
       END,
-----------------------------------------------------------------------
tmp_cp[ANY] = min(tmp_ch)[ANY],
-----------------------------------------------------------------------
match[ANY] = CASE WHEN t1_pk[CV()] = min_dup[1] THEN tmp_cp[CV()]
                  ELSE match[CV()]
             END,
-----------------------------------------------------------------------
tmp_str[ANY]=CASE WHEN CT[CV()] IS NOT NULL
                  THEN REGEXP_REPLACE(ltrim(REPLACE(
                       tmp_str[CV()],tmp_cp[CV()] ),','),'[,]+',',')
           ELSE tmp_str[CV()]
             END,
-----------------------------------------------------------------------
ct_tmp[ANY] = CASE WHEN match[CV()] IS NULL AND t1_pk[CV()] IS NOT NULL
         THEN LENGTH(tmp_str[CV()])-
                       LENGTH(REPLACE(tmp_str[CV()],','))
         ELSE NULL
      END
);
     T1_PK T2_PK   UNMATCHE  STATUS
---------- -------- -------- ----------
         1 1                 MATCHED
         2 2                 MATCHED
         3 3                 MATCHED
         4 4                 MATCHED
         5 5                 MATCHED
         6 6                 MATCHED
         7                   NO T2
                    8,9,     NO T1                                             

8 rows selected.

One Response to “How to find matched rows between 2 tables based on required matching criteria in a SQL statement”

  1. newkid Says:
    WITH  d AS (SELECT t1.pk pk1,t2.pk pk2,DENSE_RANK() OVER (ORDER BY t1.pk) rn,COUNT(DISTINCT t1.pk) OVER() cnt
                 FROM t1,t2
                WHERE t2_compare BETWEEN t1_compare-2 AND t1_compare+2
                )
        , m AS (SELECT pk1,pk2,rn,cnt FROM d
                 UNION ALL
                SELECT DISTINCT pk1,NULL,rn,cnt FROM d
               )
    ,t(rn,match_path,match_cnt,cnt) AS (
    SELECT rn,TO_CHAR(pk1)||'-'||TO_CHAR(pk2),(CASE WHEN m.pk2 IS NULL THEN 0 ELSE 1 END),cnt
      FROM m
     WHERE rn=1
    UNION ALL
    SELECT m.rn,t.match_path||','||m.pk1||'-'||m.pk2,t.match_cnt + (CASE WHEN m.pk2 IS NULL THEN 0 ELSE 1 END),t.cnt
      FROM t,m
     WHERE t.rn<t.cnt
           AND t.rn+1=m.rn
           AND INSTR(match_path||',','-'||m.pk2||',')=0
    )
    SELECT match_path FROM (SELECT t.*,RANK() OVER(ORDER BY match_cnt DESC) rnk FROM T ) WHERE rnk=1;
    
    MATCH_PATH
    --------------------------------
    1-2,2-1,3-4,4-3,5-6,6-5
    1-1,2-3,3-2,4-4,5-5,6-6
    1-1,2-2,3-3,4-4,5-5,6-6
    1-2,2-1,3-3,4-4,5-5,6-6
    1-1,2-2,3-4,4-3,5-5,6-6
    1-2,2-1,3-4,4-3,5-5,6-6
    1-1,2-3,3-2,4-4,5-6,6-5
    1-1,2-2,3-3,4-4,5-6,6-5
    1-2,2-1,3-3,4-4,5-6,6-5
    1-1,2-2,3-4,4-3,5-6,6-5
    
    10 rows selected.
    

Leave a Reply

You must be logged in to post a comment.

RSS feed for comments on this question