PostgreSQL
 sql >> Base de Dados >  >> RDS >> PostgreSQL

Consulta distinta de seleção lenta no postgres

BEGIN; 
CREATE TABLE dist ( x INTEGER NOT NULL ); 
INSERT INTO dist SELECT random()*50 FROM generate_series( 1, 5000000 ); 
COMMIT;
CREATE INDEX dist_x ON dist(x);


VACUUM ANALYZE dist;
EXPLAIN ANALYZE SELECT DISTINCT x FROM dist;

HashAggregate  (cost=84624.00..84624.51 rows=51 width=4) (actual time=1840.141..1840.153 rows=51 loops=1)
   ->  Seq Scan on dist  (cost=0.00..72124.00 rows=5000000 width=4) (actual time=0.003..573.819 rows=5000000 loops=1)
 Total runtime: 1848.060 ms

O PG não pode (ainda) usar um índice para distinto (ignorando os valores idênticos), mas você pode fazer isso:
CREATE OR REPLACE FUNCTION distinct_skip_foo()
RETURNS SETOF INTEGER
LANGUAGE plpgsql STABLE 
AS $$
DECLARE
    _x  INTEGER;
BEGIN
    _x := min(x) FROM dist;
    WHILE _x IS NOT NULL LOOP
        RETURN NEXT _x;
        _x := min(x) FROM dist WHERE x > _x;
    END LOOP;
END;
$$ ;

EXPLAIN ANALYZE SELECT * FROM distinct_skip_foo();
Function Scan on distinct_skip_foo  (cost=0.00..260.00 rows=1000 width=4) (actual time=1.629..1.635 rows=51 loops=1)
 Total runtime: 1.652 ms