Index Usage

Predicates difference

SELECT cust_first_name, cust_last_name FROM customers 
WHERE cust_id = 1030; --"equal" can use index

SELECT cust_first_name, cust_last_name FROM customers
WHERE cust_id <> 1030; --"<>" can't use index

SELECT cust_first_name, cust_Last_Name FROM customers
WHERE cust_id < 10; --small range can use index

SELECT cust_first_name, cust_last_name FROM customers
WHERE cust_id between 70 AND 80;

SELECT cust_first_name, cust_last_name FROM customers
WHERE cust_id < 10000; --too wide range can't index

Transformed Index

-- CREATE INDEX cust_cust_credit_limit_idx ON CUSTOMERS(cust_credit_limit);
SELECT cust_id
FROM customers
WHERE cust_credit_limit*1.10 = 11000; --won't use index

Data Type mismatching

-- CREATE INDEX cust_postal_code_idx ON CUSTOMERS(cust_postal_code);
SELECT cust_street_address 
FROM customers 
WHERE cust_postal_code =  68054; --won't use index

Func(idx_col) won't work

SELECT p.town_name, c.cust_last_name
FROM customers c, postal_codes p
WHERE p.code1 = substr(c.cust_postal_code,1,2)
AND p.code2 = substr(c.cust_postal_code,3,3)
AND p.code1 = '67'
AND c.country_id = 52790;

max/min

--only need access index, without table
SELECT max(cust_credit_limit) --low cost
FROM customers;

--only need access index, without table, for +/-
SELECT max(cust_credit_limit+1000) --high cost
FROM customers;

--cannot use index, need access the table, for *//
SELECT max(cust_credit_limit*2) --high cost
FROM customers;

--create unique index sales_pk on sales(time_id, prod_id, cust_id, channel_id);
--wrong logic
SELECT *
FROM sales
WHERE time_id = (SELECT max(time_id)
    FROM sales
    WHERE prod_id = 13
    AND cust_id = 524);
    
--correct logic
select *
from sales s1
where time_id = (select max(time_id)
    from sales s2
    where s1.prod_id = s2.prod_id
    and s1.cust_id = s2.cust_id)
and prod_id = 13
and cust_id = 524;

select /*+ index_desc(sales sales_pk) */ *
from sales
where prod_id = 13
and cust_id = 524
and rownum = 1;

Between ranage

--create index cust_country_state_city_ix on customers(country_id,cust_state_province,cust_city)
SELECT cust_id, cust_first_name, cust_last_name, 
cust_state_province 
FROM customers
WHERE country_id between 52788 and 52790
AND cust_state_province like 'W%'; --too wide, won't use index

SELECT cust_id, cust_first_name, cust_last_name, 
cust_state_province 
FROM customers
WHERE country_id between 52788 and 52790
AND cust_state_province like 'WA%'; --more narrow, can use index

Correlated subquery

SELECT department_id, last_name, salary 
FROM employees e1 
WHERE salary > (SELECT AVG(salary) 
    FROM employees e2
    WHERE e1.department_id = e2.department_id) 
ORDER BY department_id;

Avoid Sorting

indexing the filter key

SELECT cust_first_name, cust_last_name, cust_city
FROM customers
WHERE cust_city = 'Paris' --sort is needed, as sorting key is not the filter key
ORDER BY cust_id;

SELECT cust_first_name, cust_last_name, cust_city
FROM customers
WHERE cust_id < 200 --sort is avoid, as sorting key is already an indexed (and sorted) key
ORDER BY cust_id;

union, union all

SELECT cust_last_name 
FROM customers
WHERE cust_city = 'Paris'
UNION -- cause sort
--UNION ALL -- won't cause sort
SELECT cust_last_name FROM customers
WHERE cust_credit_limit < 10000;

avoid use HAVING

--create index cust_cust_city_idx on customers(cust_city);
SELECT cust_city, avg(cust_credit_limit)
FROM customers
GROUP BY cust_city
HAVING cust_city = 'Paris'; --HAVING take effect at last step, BAD

SELECT cust_city, avg(cust_credit_limit)
FROM customers
WHERE cust_city = 'Paris' --filter data early, GOOD
GROUP BY cust_city

join order

--wrong order, from big opset to small opset in the FROM clause
select /*+ ordered */ co.country_name,cu.cust_first_name,cu.cust_last_name,s.quantity_sold,s.amount_sold
from sales s, customers cu, countries co
where s.cust_id=cu.cust_id and cu.country_id=co.country_id
  and cu.cust_credit_limit>=1500
  and co.country_id=52771;

--correct order, from small opset to big opset in the FROM clause
select /*+ ordered */ co.country_name,cu.cust_first_name,cu.cust_last_name,s.quantity_sold,s.amount_sold
from countries co,customers cu,sales s
where s.cust_id=cu.cust_id and cu.country_id=co.country_id
  and cu.cust_credit_limit>=1500
  and co.country_id=52771; --driving table should be the one with least data to join further

Exists

--normal join is more expensive
SELECT s.*
FROM sales s, customers c
WHERE s.cust_id = c.cust_id
AND prod_id = 13
AND c.cust_credit_limit > 10000;

--use EXISTS to decrease scan range.
SELECT *
FROM sales s
WHERE s.prod_id = 13
  AND EXISTS (
    SELECT 1 --for each item of outsize sales record, if a satisfied customer is found, the round is returned
    FROM customers c
    WHERE c.cust_id = s.cust_id
      AND c.cust_credit_limit > 10000
    );

Reversing Index

--create index cust_last_name_ix on customers(cust_last_name);
SELECT cust_first_name, cust_last_name
FROM customers
WHERE cust_last_name like '%ing'; --cannot use index even have

--create index cust_last_name_rix on customers(REVERSE(cust_last_name));
select cust_first_name, cust_last_name
from customers
where reverse(cust_last_name) like 'gni%'; --can use the REVERSE'ed index, but syntax is odd

Use Staging Table

SELECT sum(amount_sold)
FROM sales s, times t, customers c
WHERE s.time_id  = t.time_id
  AND s.cust_id  = c.cust_id
  AND t.day_name = 'Friday'
  AND country_id = 52772;

SELECT sum(amount_sold)
FROM sales s, times t, products p
WHERE s.time_id  = t.time_id
  AND s.prod_id  = p.prod_id
  AND t.day_name = 'Friday'
  AND prod_category = 'Electronics';

SELECT sum(amount_sold)
FROM sales s, times t, promotions p
WHERE s.time_id  = t.time_id
  AND s.promo_id = p.promo_id
  AND t.day_name = 'Friday'
  AND promo_category= 'TV';

--create staging table
CREATE TABLE SALES_FRI AS 
SELECT cust_id,prod_id,promo_id,amount_sold
FROM sales s, times t
WHERE s.time_id  = t.time_id
  AND t.day_name = 'Friday';

--use staging table instead
SELECT sum(amount_sold)
FROM SALES_FRI s, customers c
WHERE s.cust_id  = c.cust_id
  AND country_id = 52772;

SELECT sum(amount_sold)
FROM SALES_FRI s, products p
WHERE s.prod_id  = p.prod_id
  AND prod_category = 'Electronics';

SELECT sum(amount_sold)
FROM SALES_FRI s, promotions p
WHERE s.promo_id = p.promo_id
  AND promo_category= 'TV';

use WITH

-- WITH is like GTT(Global Temp Table)
-- can use with hints MATERIALIZE | INLINE for fine tune
WITH dept_costs
AS ( SELECT dname ,SUM(sal) dept_total
  FROM emp e ,dept d
  WHERE e.deptno = d.deptno
  GROUP BY dname )
  ,avg_cost
AS (
  SELECT SUM(dept_total) / COUNT(*) avg
  FROM dept_costs
  )
SELECT *
FROM dept_costs
WHERE dept_total > ( SELECT avg FROM avg_cost )
ORDER BY dname;
------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                | Name                        | Rows  | Bytes | Cost (%CPU)| Time     |
------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                         |                             |     4 |    88 |    12  (25)| 00:00:01 |
|   1 |  TEMP TABLE TRANSFORMATION               |                             |       |       |            |          |
|   2 |   LOAD AS SELECT (CURSOR DURATION MEMORY)| SYS_TEMP_0FD9D8A43_9EACAE31 |       |       |            |          |
|   3 |    HASH GROUP BY                         |                             |     4 |    80 |     7  (29)| 00:00:01 |
|   4 |     MERGE JOIN                           |                             |    14 |   280 |     6  (17)| 00:00:01 |
|   5 |      TABLE ACCESS BY INDEX ROWID         | DEPT                        |     4 |    52 |     2   (0)| 00:00:01 |
|   6 |       INDEX FULL SCAN                    | PK_DEPT                     |     4 |       |     1   (0)| 00:00:01 |
|*  7 |      SORT JOIN                           |                             |    14 |    98 |     4  (25)| 00:00:01 |
|   8 |       TABLE ACCESS FULL                  | EMP                         |    14 |    98 |     3   (0)| 00:00:01 |
|   9 |   SORT ORDER BY                          |                             |     4 |    88 |     5  (20)| 00:00:01 |
|* 10 |    VIEW                                  |                             |     4 |    88 |     2   (0)| 00:00:01 |
|  11 |     TABLE ACCESS FULL                    | SYS_TEMP_0FD9D8A43_9EACAE31 |     4 |    80 |     2   (0)| 00:00:01 |
|  12 |     VIEW                                 |                             |     1 |    13 |     2   (0)| 00:00:01 |
|  13 |      SORT AGGREGATE                      |                             |     1 |    13 |            |          |
|  14 |       VIEW                               |                             |     4 |    52 |     2   (0)| 00:00:01 |
|  15 |        TABLE ACCESS FULL                 | SYS_TEMP_0FD9D8A43_9EACAE31 |     4 |    80 |     2   (0)| 00:00:01 |
------------------------------------------------------------------------------------------------------------------------

Partition Pruning

select * from sales s
where time_id > to_date('2001-03-01','yyyy-mm-dd') 
  and time_id < to_date('2001-04-01','yyyy-mm-dd');

------------------------------------------------------------------------------------------------
| Id  | Operation              | Name  | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |
------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT       |       | 19861 |   581K|    37   (3)| 00:00:01 |       |       |
|   1 |  PARTITION RANGE SINGLE|       | 19861 |   581K|    37   (3)| 00:00:01 |    17 |    17 |
|*  2 |   TABLE ACCESS FULL    | SALES | 19861 |   581K|    37   (3)| 00:00:01 |    17 |    17 |
------------------------------------------------------------------------------------------------

Use Bind Variable

connection.execute(
  "INSERT INTO countries VALUES (:country_id, :country_name)",
  [90, "Tonga"], --bind variables
  function(err, result)
  {
    if (err)
      console.error(err.message);
    else
      console.log("Rows inserted " + result.rowsAffected);
  });

Index for Null

--B*tree index don't store NULL value
--Bitmap index store NULL value, can be used for low cardinality clolumn

Ordering Keys for Composite Index

SELECT count(*) 
FROM customers
WHERE country_id > 52772
AND cust_state_province = 'CA'
AND cust_city = 'Belmont';

create index cust_country_state_city_ix1 on customers(country_id, cust_state_province, cust_city)
-----------------------------------------------------------------------------------------------
| Id  | Operation        | Name                       | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT |                            |     1 |    26 |    18   (0)| 00:00:01 |
|   1 |  SORT AGGREGATE  |                            |     1 |    26 |            |          |
|*  2 |   INDEX SKIP SCAN| CUST_COUNTRY_STATE_CITY_IX |     3 |    78 |    18   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------------

-- put selective column ahead
create index cust_country_state_city_ix2 on customers(cust_city, country_id, cust_state_province); 
-------------------------------------------------------------------------------------------------
| Id  | Operation         | Name                        | Rows  | Bytes | Cost (%CPU)| Time     |
-------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |                             |     1 |    26 |     2   (0)| 00:00:01 |
|   1 |  SORT AGGREGATE   |                             |     1 |    26 |            |          |
|*  2 |   INDEX RANGE SCAN| CUST_COUNTRY_STATE_CITY_IX2 |     3 |    78 |     2   (0)| 00:00:01 |
-------------------------------------------------------------------------------------------------

Use Materialized View

--TBD

Star Transformation

SELECT s.amount_sold,p.prod_name,ch.channel_desc
FROM sales s, products p, channels ch, customers c
WHERE s.prod_id=p.prod_id
AND s.channel_id=ch.channel_id
AND s.cust_id=c.cust_id
AND ch.channel_id in (3, 4)
AND c.cust_city='Asten'
AND p.prod_id>100;

----------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                             | Name               | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |
----------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                      |                    |  4599 |   336K|   657   (3)| 00:00:01 |       |       |
|*  1 |  HASH JOIN                            |                    |  4599 |   336K|   657   (3)| 00:00:01 |       |       |
|   2 |   INLIST ITERATOR                     |                    |       |       |            |          |       |       |
|   3 |    TABLE ACCESS BY INDEX ROWID        | CHANNELS           |     2 |    26 |     2   (0)| 00:00:01 |       |       |
|*  4 |     INDEX UNIQUE SCAN                 | CHANNELS_PK        |     2 |       |     1   (0)| 00:00:01 |       |       |
|*  5 |   HASH JOIN                           |                    |  4599 |   278K|   655   (3)| 00:00:01 |       |       |
|   6 |    TABLE ACCESS BY INDEX ROWID BATCHED| PRODUCTS           |    26 |   780 |     2   (0)| 00:00:01 |       |       |
|*  7 |     INDEX RANGE SCAN                  | PRODUCTS_PK        |    26 |       |     1   (0)| 00:00:01 |       |       |
|*  8 |    HASH JOIN                          |                    | 12934 |   404K|   652   (3)| 00:00:01 |       |       |
|*  9 |     VIEW                              | index$_join$_004   |   416 |  6240 |   119   (1)| 00:00:01 |       |       |
|* 10 |      HASH JOIN                        |                    |       |       |            |          |       |       |
|* 11 |       INDEX RANGE SCAN                | CUST_CUST_CITY_IDX |   416 |  6240 |     2   (0)| 00:00:01 |       |       |
|  12 |       INDEX FAST FULL SCAN            | CUSTOMERS_PK       |   416 |  6240 |   146   (1)| 00:00:01 |       |       |
|  13 |     PARTITION RANGE ALL               |                    |   219K|  3646K|   532   (3)| 00:00:01 |     1 |    28 |
|* 14 |      TABLE ACCESS FULL                | SALES              |   219K|  3646K|   532   (3)| 00:00:01 |     1 |    28 |
----------------------------------------------------------------------------------------------------------------------------

--A bitmap index must be built on each of the foreign key columns of the fact tables.
--The STAR_TRANSFORMATION_ENABLED initialization parameter should be set to TRUE.
-----------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                    | Name               | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |
-----------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                             |                    |  4603 |   269K|   557   (1)| 00:00:01 |       |       |
|*  1 |  HASH JOIN                                   |                    |  4603 |   269K|   437   (1)| 00:00:01 |       |       |
|   2 |   INLIST ITERATOR                            |                    |       |       |            |          |       |       |
|   3 |    TABLE ACCESS BY INDEX ROWID               | CHANNELS           |     2 |    26 |     2   (0)| 00:00:01 |       |       |
|*  4 |     INDEX UNIQUE SCAN                        | CHANNELS_PK        |     2 |       |     1   (0)| 00:00:01 |       |       |
|*  5 |   HASH JOIN                                  |                    |  4603 |   211K|   435   (1)| 00:00:01 |       |       |
|   6 |    TABLE ACCESS BY INDEX ROWID BATCHED       | PRODUCTS           |    26 |   780 |     2   (0)| 00:00:01 |       |       |
|*  7 |     INDEX RANGE SCAN                         | PRODUCTS_PK        |    26 |       |     1   (0)| 00:00:01 |       |       |
|   8 |    PARTITION RANGE ALL                       |                    |  4675 | 79475 |   433   (1)| 00:00:01 |     1 |    28 |
|   9 |     TABLE ACCESS BY LOCAL INDEX ROWID BATCHED| SALES              |  4675 | 79475 |   433   (1)| 00:00:01 |     1 |    28 |
|  10 |      BITMAP CONVERSION TO ROWIDS             |                    |       |       |            |          |       |       |
|  11 |       BITMAP AND                             |                    |       |       |            |          |       |       |
|  12 |        BITMAP MERGE                          |                    |       |       |            |          |       |       |
|  13 |         BITMAP KEY ITERATION                 |                    |       |       |            |          |       |       |
|  14 |          BUFFER SORT                         |                    |       |       |            |          |       |       |
|* 15 |           INDEX RANGE SCAN                   | PRODUCTS_PK        |    26 |   104 |     1   (0)| 00:00:01 |       |       |
|* 16 |          BITMAP INDEX RANGE SCAN             | SALES_PROD_BIX     |       |       |            |          |     1 |    28 |
|  17 |        BITMAP OR                             |                    |       |       |            |          |       |       |
|* 18 |         BITMAP INDEX SINGLE VALUE            | SALES_CHANNEL_BIX  |       |       |            |          |     1 |    28 |
|* 19 |         BITMAP INDEX SINGLE VALUE            | SALES_CHANNEL_BIX  |       |       |            |          |     1 |    28 |
|  20 |        BITMAP MERGE                          |                    |       |       |            |          |       |       |
|  21 |         BITMAP KEY ITERATION                 |                    |       |       |            |          |       |       |
|  22 |          BUFFER SORT                         |                    |       |       |            |          |       |       |
|* 23 |           VIEW                               | index$_join$_060   |   416 |  6240 |   119   (1)| 00:00:01 |       |       |
|* 24 |            HASH JOIN                         |                    |       |       |            |          |       |       |
|* 25 |             INDEX RANGE SCAN                 | CUST_CUST_CITY_IDX |   416 |  6240 |     2   (0)| 00:00:01 |       |       |
|  26 |             INDEX FAST FULL SCAN             | CUSTOMERS_PK       |   416 |  6240 |   146   (1)| 00:00:01 |       |       |
|* 27 |          BITMAP INDEX RANGE SCAN             | SALES_CUST_BIX     |       |       |            |          |     1 |    28 |
-----------------------------------------------------------------------------------------------------------------------------------