Query column with range types

From PostgreSQL wiki

Jump to: navigation, search

Contents

Rationale

The main use case for range types is to store ranges in PostgreSQL tables, and then find rows whose range includes a certain literal. These can already be indexed using GIN and GiST index types.

db=# EXPLAIN ANALYZE SELECT * FROM rng WHERE rng @> 10.0;
Bitmap Heap Scan on rng  (cost=48.40..2709.45 rows=1000 width=32) (actual time=0.103..0.103 rows=10 loops=1)
  Recheck Cond: (rng @> 10.0)
  ->  Bitmap Index Scan on rng_rng_idx  (cost=0.00..48.15 rows=1000 width=0) (actual time=0.096..0.096 rows=10 loops=1)
        Index Cond: (rng @> 10.0)
Total runtime: 0.136 ms

However, you cannot index inverse queries - if you want to find single values in a certain range:

db=# EXPLAIN ANALYZE SELECT * FROM num WHERE i <@ '[0, 10)';
Seq Scan on num  (cost=0.00..16925.00 rows=1000 width=6) (actual time=0.014..188.761 rows=9 loops=1)
  Filter: (i <@ '[0,10]'::numrange)
  Rows Removed by Filter: 999991
Total runtime: 188.820 ms

Example

Of course the above query is equivalent to the indexable query WHERE num >= 0 AND num < 10. But what if you want to use range types? Well, you can create a custom operator with a function that automatically gets inlined:

db=# EXPLAIN ANALYZE SELECT * FROM num WHERE i <@ '[0, 10)';
Index Only Scan using num_i_idx on num  (cost=0.00..4.56 rows=10 width=6) (actual time=0.014..0.016 rows=9 loops=1)
  Index Cond: ((i >= 0::numeric) AND (i < 10::numeric))
  Heap Fetches: 0
Total runtime: 0.039 ms

Infinite terms get optimized out:

db=# EXPLAIN ANALYZE SELECT * FROM num WHERE i <@ '[,10]';
Index Only Scan using num_i_idx on num  (cost=0.00..4.53 rows=10 width=6) (actual time=0.007..0.009 rows=10 loops=1)
  Index Cond: (i <= 10::numeric)
  Heap Fetches: 0
Total runtime: 0.032 ms

Beware!

These custom operators are deliberately defined to be non-commative. If you create these operators, do not write range queries in the form 10.0 <@ range_col because it will blow up:

db=# EXPLAIN ANALYZE SELECT * FROM rng WHERE 10.0 <@ rng;
Seq Scan on rng  (cost=0.00..46358.16 rows=444424 width=32) (actual time=0.013..240.054 rows=10 loops=1)
  Filter: ((lower_inf(rng) OR CASE WHEN lower_inc(rng) THEN (10.0 >= lower(rng)) ELSE (10.0 > lower(rng)) END)
       AND (upper_inf(rng) OR CASE WHEN upper_inc(rng) THEN (10.0 <= upper(rng)) ELSE (10.0 < upper(rng)) END))
  Rows Removed by Filter: 999990
Total runtime: 240.125 ms

Code

You can adapt this code to any range type you need, just by changing the types of the function and the operator.

-- int4range (integer)
CREATE OR REPLACE FUNCTION range_contains(el integer, rng int4range) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=integer, rightarg=int4range);
 
-- int8range (bigint)
CREATE OR REPLACE FUNCTION range_contains(el bigint, rng int8range) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=bigint, rightarg=int8range);
 
-- numrange (numeric)
CREATE OR REPLACE FUNCTION range_contains(el numeric, rng numrange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=numeric, rightarg=numrange);
 
-- tsrange (timestamp without time zone)
CREATE OR REPLACE FUNCTION range_contains(el timestamp, rng tsrange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=timestamp, rightarg=tsrange);
 
-- tstzrange (timestamp with time zone)
CREATE OR REPLACE FUNCTION range_contains(el timestamptz, rng tstzrange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=timestamptz, rightarg=tstzrange);
 
-- daterange (date)
CREATE OR REPLACE FUNCTION range_contains(el date, rng daterange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=date, rightarg=daterange);



Idea from Andres Freund, implemented by Marti Raudsepp.

Personal tools