Please help me with the efficient JOIN query to bring the below result :
create table pk1(col1 int)
create table pk2(col1 int)
create table pk3(col1 int)
create table fk(col1 int, col2 int NOT NULL, col3 int, col4 int)
insert into pk1 values(1)
insert into pk1 values(2)
insert into pk1 values(3)
insert into pk2 values(1)
insert into pk2 values(2)
insert into pk2 values(3)
insert into pk3 values(1)
insert into pk3 values(2)
insert into pk3 values(3)
insert into fk values(1, 1, null, 10)
insert into fk values(null, 1, 1, 20)
insert into fk values(1, 1,null, 30)
insert into fk values(1, 1, null, 40)
insert into fk values(1, 1, 1, 70)
insert into fk values(2, 3, 1, 60)
insert into fk values(1, 1, 1, 100)
insert into fk values(2, 2, 3, 80)
insert into fk values(null, 1, 2, 50)
insert into fk values(null, 1, 4, 150)
insert into fk values(5, 1, 2, 250)
insert into fk values(6, 7, 8, 350)
insert into fk values(10, 1, null, 450)
Below query will give the result :
select fk.* from fk inner join pk1 on pk1.col1 = fk.col1 inner join pk2 on pk2.col1 = fk.col2 inner join pk3 on pk3.col1 = fk.col3
Result :
+--+--+--+--+
| col1 | col2 | col3 | col4 |
+--+--+--+--+
| 1 | 1 | 1 | 70 |
| 2 | 3 | 1 | 60 |
| 1 | 1 | 1 | 100 |
| 2 | 2 | 3 | 80 |
+--+--+--+--+
But I require also the NULL values in col1 and col3
Hence doing the below :
select distinct fk.* from fk inner join pk1 on pk1.col1 = fk.col1 or fk.col1 is null inner join pk2 on pk2.col1 = fk.col2 inner join pk3 on pk3.col1 = fk.col3 or fk.col3 is null
+--+--+--+--+
| col1 | col2 | col3 | col4 |
+--+--+--+--+
| null | 1 | 1 | 20 |
| null | 1 | 2 | 50 |
| 1 | 1 | null | 10 |
| 1 | 1 | null | 30 |
| 1 | 1 | null | 40 |
| 1 | 1 | 1 | 70 |
| 1 | 1 | 1 | 100 |
| 2 | 2 | 3 | 80 |
| 2 | 3 | 1 | 60 |
+--+--+--+--+
The above is the reqd output, but the query will be very slow if there are more NULL valued rows in col1 and col3, since I need to also use distinct if I use 'IS NULL' check in JOIN.
Please let me know if there is an aliternative to this query which can return the same result set in an efficient manner.Hi
I've just written some SQL and looked back at your previous post and seen I've pretty well written what BM & Mallier suggested.
Indexes (major effect):
CREATE UNIQUE INDEX pk1_idx1 ON pk1(col1)
CREATE UNIQUE INDEX pk2_idx1 ON pk2(col1)
CREATE UNIQUE INDEX pk3_idx1 ON pk3(col1)
CREATE INDEX fk_idx1 ON fk(col1)
CREATE INDEX fk_idx2 ON fk(col2)
CREATE INDEX fk_idx3 ON fk(col3)
UNION (minor effect):
select DISTINCT
fk.*
from fk inner join
pk1 on
pk1.col1 = fk.col1
or fk.col1 is null
inner join pk2
on pk2.col1 = fk.col2
inner join pk3 on
pk3.col1 = fk.col3
or fk.col3 is null
select fk.*
from fk inner join
pk1 on
pk1.col1 = fk.col1
inner join pk2
on pk2.col1 = fk.col2
inner join pk3 on
pk3.col1 = fk.col3
UNION ALL
SELECT fk.*
FROM fk
inner join pk2 on
pk2.col1 = fk.col2
WHERE fk.col1 IS NULL
AND fk.col3 IS NULL
UNION ALL
SELECT fk.*
FROM fk
inner join pk2 on
pk2.col1 = fk.col2
INNER JOIN pk1 ON
pk1.col1 = fk.col1
WHERE fk.col3 IS NULL
UNION ALL
SELECT fk.*
FROM fk
inner join pk2 on
pk2.col1 = fk.col2
INNER JOIN pk3 ON
pk3.col1 = fk.col3
WHERE fk.col1 IS NULL
Once the indexes are added the UNION comes out slightly better on the plan but probably not worth the hassle of such an inflexible query.
Also - only selecting the columns you actually need would be good practice(when it comes to the real query).
HTH|||with less data ,its difficult say which query is best,though below below code is more efficient than query u posted. this query can avoid distinct key word.Consider index which mentioned by pootle
--selection query--
select
fk.*
from
fk
where
exists(select
null
from
pk1
where pk1.col1 = fk.col1 or fk.col1 is null)
and exists( select
null
from
pk2
where pk2.col1 = fk.col2 or fk.col1 is null)
and exists( select
null
from
pk3
where pk3.col1 = fk.col3 or fk.col3 is null)
Comparison
set statistics profile on
set statistics io on
--ur query without any index---
select distinct fk.*
from fk inner join pk1 on pk1.col1 = fk.col1 or fk.col1 is null
inner join pk2 on pk2.col1 = fk.col2
inner join pk3 on pk3.col1 = fk.col3 or fk.col3 is null
/*
Table 'pk3'. Scan count 16, logical reads 16, physical reads 0, read-ahead reads 0.
Table 'pk1'. Scan count 12, logical reads 12, physical reads 0, read-ahead reads 0.
Table 'pk2'. Scan count 13, logical reads 13, physical reads 0, read-ahead reads 0.
Table 'fk'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0.
*/
-- improved query without any index--
select
fk.*
from
fk
where
exists(select
null
from
pk1
where pk1.col1 = fk.col1 or fk.col1 is null)
and exists( select
null
from
pk2
where pk2.col1 = fk.col2 or fk.col1 is null)
and exists( select
null
from
pk3
where pk3.col1 = fk.col3 or fk.col3 is null)
/*
Table 'pk3'. Scan count 10, logical reads 10, physical reads 0, read-ahead reads 0.
Table 'pk2'. Scan count 10, logical reads 10, physical reads 0, read-ahead reads 0.
Table 'pk1'. Scan count 13, logical reads 13, physical reads 0, read-ahead reads 0.
Table 'fk'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0.
It execute 1 step less than previous query
*/
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment