Query column with range types

From PostgreSQL wiki
Jump to navigationJump to search

Rationale

The main use case for range types is to store ranges in PostgreSQL tables, and then find rows whose range includes a certain literal. These can already be indexed using GIN and GiST index types.

db=# EXPLAIN ANALYZE SELECT * FROM rng WHERE rng @> 10.0;
Bitmap Heap Scan on rng  (cost=48.40..2709.45 rows=1000 width=32) (actual time=0.103..0.103 rows=10 loops=1)
  Recheck Cond: (rng @> 10.0)
  ->  Bitmap Index Scan on rng_rng_idx  (cost=0.00..48.15 rows=1000 width=0) (actual time=0.096..0.096 rows=10 loops=1)
        Index Cond: (rng @> 10.0)
Total runtime: 0.136 ms

However, you cannot index inverse queries - if you want to find single values in a certain range:

db=# EXPLAIN ANALYZE SELECT * FROM num WHERE i <@ '[0, 10)';
Seq Scan on num  (cost=0.00..16925.00 rows=1000 width=6) (actual time=0.014..188.761 rows=9 loops=1)
  Filter: (i <@ '[0,10]'::numrange)
  Rows Removed by Filter: 999991
Total runtime: 188.820 ms

Example

Of course the above query is equivalent to the indexable query WHERE num >= 0 AND num < 10. But what if you want to use range types? Well, you can create a custom operator with a function that automatically gets inlined:

db=# EXPLAIN ANALYZE SELECT * FROM num WHERE i <@ '[0, 10)';
Index Only Scan using num_i_idx on num  (cost=0.00..4.56 rows=10 width=6) (actual time=0.014..0.016 rows=9 loops=1)
  Index Cond: ((i >= 0::numeric) AND (i < 10::numeric))
  Heap Fetches: 0
Total runtime: 0.039 ms

Infinite terms get optimized out:

db=# EXPLAIN ANALYZE SELECT * FROM num WHERE i <@ '[,10]';
Index Only Scan using num_i_idx on num  (cost=0.00..4.53 rows=10 width=6) (actual time=0.007..0.009 rows=10 loops=1)
  Index Cond: (i <= 10::numeric)
  Heap Fetches: 0
Total runtime: 0.032 ms

Beware!

These custom operators are deliberately defined to be non-commative. If you create these operators, do not write range queries in the form 10.0 <@ range_col because it will blow up:

db=# EXPLAIN ANALYZE SELECT * FROM rng WHERE 10.0 <@ rng;
Seq Scan on rng  (cost=0.00..46358.16 rows=444424 width=32) (actual time=0.013..240.054 rows=10 loops=1)
  Filter: ((lower_inf(rng) OR CASE WHEN lower_inc(rng) THEN (10.0 >= lower(rng)) ELSE (10.0 > lower(rng)) END)
       AND (upper_inf(rng) OR CASE WHEN upper_inc(rng) THEN (10.0 <= upper(rng)) ELSE (10.0 < upper(rng)) END))
  Rows Removed by Filter: 999990
Total runtime: 240.125 ms

Code

You can adapt this code to any range type you need, just by changing the types of the function and the operator.

-- int4range (integer)
CREATE OR REPLACE FUNCTION range_contains(el integer, rng int4range) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=integer, rightarg=int4range);

-- int8range (bigint)
CREATE OR REPLACE FUNCTION range_contains(el bigint, rng int8range) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=bigint, rightarg=int8range);

-- numrange (numeric)
CREATE OR REPLACE FUNCTION range_contains(el numeric, rng numrange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=numeric, rightarg=numrange);

-- tsrange (timestamp without time zone)
CREATE OR REPLACE FUNCTION range_contains(el timestamp, rng tsrange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=timestamp, rightarg=tsrange);

-- tstzrange (timestamp with time zone)
CREATE OR REPLACE FUNCTION range_contains(el timestamptz, rng tstzrange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=timestamptz, rightarg=tstzrange);

-- daterange (date)
CREATE OR REPLACE FUNCTION range_contains(el date, rng daterange) RETURNS bool IMMUTABLE LANGUAGE sql AS $$
SELECT (lower_inf(rng) OR (CASE WHEN lower_inc(rng) THEN el >= lower(rng) ELSE el > lower(rng) END))
   AND (upper_inf(rng) OR (CASE WHEN upper_inc(rng) THEN el <= upper(rng) ELSE el < upper(rng) END))
$$;
CREATE OPERATOR <@ (procedure=range_contains, leftarg=date, rightarg=daterange);



Idea from Andres Freund, implemented by Marti Raudsepp.