User-specified ordering with fractions

From PostgreSQL wiki
Jump to navigationJump to search

Requirement: given that things can be in categories, allow the position of a thing in the display order to be specified by the user or application (i.e. arbitrarily, rather than according to some stable attribute of the data).

For example, something that displays a list and allows the user to shuffle the items around.

There are a number of possible approaches. Using integers is simple but tends to require frequent renumberings. Using floats and picking the midpoints between adjacent values also runs out of space rapidly (you only need 50-odd inserts at the wrong spot to start hitting problems). So this approach uses integer fractions, choosing the values (from the Stern–Brocot tree) such that they can be sorted using (p::float8/q) but renumbering values is only rarely required.

create table categories (id integer primary key, name text);
create table things (id integer primary key, name text);

create table category_member (
  category_id integer not null references categories,
  thing_id integer not null references things on delete cascade,
  primary key (category_id,thing_id),
  p integer not null, q integer not null
);

create unique index on category_member (category_id, (p::float8/q));


-- find an intermediate fraction between p1/q1 and p2/q2.
--
-- The fraction chosen is the highest fraction in the Stern-Brocot
-- tree which falls strictly between the specified values. This is
-- intended to avoid going deeper in the tree unnecessarily when the
-- list is already sparse due to deletion or moving of items, but in
-- fact the case when the two items are already adjacent in the tree
-- is common so we shortcut it. As a bonus, this method always
-- generates fractions in lowest terms, so there is no need for GCD
-- calculations anywhere.
--
-- Inputs must not be null (caller's responsibility), but the value
-- p2=1 q2=0 is allowed for the upper bound without causing any
-- division by zero errors.

create or replace function find_intermediate(p1 integer, q1 integer,
                                             p2 integer, q2 integer,
                                             OUT p integer, OUT q integer)
  language plpgsql immutable strict
as $f$
  declare
    pl integer := 0;
    ql integer := 1;
    ph integer := 1;
    qh integer := 0;
  begin
    if (p1::bigint*q2 + 1) <> (p2::bigint*q1) then
      loop
        p := pl + ph;
        q := ql + qh;
        if (p::bigint*q1 <= q::bigint*p1) then
          pl := p; ql := q;
        elsif (p2::bigint*q <= q2::bigint*p) then
          ph := p; qh := q;
        else
          exit;
        end if;
      end loop;
    else
      p := p1 + p2;
      q := q1 + q2;
    end if;
  end;
$f$;

-- insert or move item ACT_ID in category CAT_ID next to REL_ID,
-- before it if IS_BEFORE is true, otherwise after. REL_ID may
-- be null to indicate a position off the end of the list.

create or replace function cat_place_item(cat_id integer,
                                          act_id integer,
                                          rel_id integer,
                                          is_before boolean)
  returns void
  language plpgsql
  volatile called on null input
as $f$
  declare
    p1 integer; q1 integer;   -- fraction below insert position
    p2 integer; q2 integer;   -- fraction above insert position
    r_rel double precision;   -- p/q of the rel_id row
    np integer; nq integer;   -- new insert position fraction
  begin
    -- lock the category
    perform 1 from categories c where c.id=cat_id for update;

    -- moving a record to its own position is a no-op
    if rel_id=act_id then return; end if;

    -- if we're positioning next to a specified row, it must exist
    if rel_id is not null then
      select cm.p, cm.q into strict p1, q1
        from category_member cm
       where cm.category_id=cat_id and cm.thing_id=rel_id;
      r_rel := p1::float8 / q1;
    end if;

    -- find the next adjacent row in the desired direction
    -- (might not exist).
    if is_before then
      p2 := p1; q2 := q1;
      select cm2.p, cm2.q into p1, q1
        from category_member cm2
       where cm2.category_id=cat_id and cm2.thing_id <> act_id
         and (p::float8/q) < coalesce(r_rel,'infinity')
       order by (p::float8/q) desc limit 1;
    else
      select cm2.p, cm2.q into p2, q2
        from category_member cm2
       where cm2.category_id=cat_id and cm2.thing_id <> act_id
         and (p::float8/q) > coalesce(r_rel,0)
       order by (p::float8/q) asc limit 1;
    end if;

    -- compute insert fraction
    select * into np,nq from find_intermediate(coalesce(p1,0),coalesce(q1,1),
                                               coalesce(p2,1),coalesce(q2,0));

    -- move or insert the specified row
    update category_member
       set (p,q) = (np,nq) where category_id=cat_id and thing_id=act_id;
    if not found then
      insert into category_member values (cat_id, act_id, np, nq);
    end if;

    -- want to renormalize both to avoid possibility of integer overflow
    -- and to ensure that distinct fraction values map to distinct float8
    -- values. Bounding to 10 million gives us reasonable headroom while
    -- not requiring frequent normalization.

    if (np > 10000000) or (nq > 10000000) then
      perform cat_renormalize(cat_id);
    end if;
  end;
$f$;

-- Renormalize the fractions of items in CAT_ID, preserving the
-- existing order. The new fractions are not strictly optimal, but
-- doing better would require much more complex calculations.
--
-- the purpose of the complex update is as follows: we want to assign
-- a new series of values 1/2, 3/2, 5/2, ... to the existing rows,
-- maintaining the existing order, but because the unique expression
-- index is not deferrable, we want to avoid assigning any new value
-- that collides with an existing one.
--
-- We do this by calculating, for each existing row with an x/2 value,
-- which position in the new sequence it would appear at. This is done
-- by adjusting the value of p downwards according to the number of
-- earlier values in sequence. To see why, consider:
--
--   existing values:    3, 9,13,15,23
--   new simple values:  1, 3, 5, 7, 9,11,13,15,17,19,21
--                          *     *  *        *
--   adjusted values:    1, 5, 7,11,17,19,21,25,27,29,31
--
--   points of adjustment: 3, 7 (9-2), 9 (13-4, 15-6), 15 (23-8)
--
-- The * mark the places where an adjustment has to be applied.
--
-- Having calculated the adjustment points, the adjusted value is
-- simply the simple value adjusted upwards according to the number of
-- points passed (counting multiplicity).

create or replace function cat_renormalize(cat_id integer)
  returns void
  language plpgsql
  volatile strict
as $f$
  begin
    perform 1 from categories c where c.id=cat_id for update;
    update category_member cm set p=s2.new_rnum, q=2
      from (select thing_id,
                   is_existing = 0 as is_new,
                   -- increase the current value according to the
                   -- number of adjustment points passed
                   rnum + 2*(sum(is_existing)
                               over (order by rnum))
                     as new_rnum
              from (
                    -- assign the initial simple values to every item
		    -- in order
                    select thing_id,
                           2*(row_number() over (order by p::float8/q)) - 1
                             as rnum,
                           0 as is_existing
                      from category_member cm2
                     where cm2.category_id=cat_id
                    union all
                    -- and merge in the adjustment points required to
                    -- skip over existing x/2 values
                    select thing_id,
                           p + 2 - 2*(count(*) over (order by p))
                             as rnum,
                           1 as is_existing
                      from category_member cm3
                     where cm3.category_id=cat_id
                       and cm3.q=2
                   ) s1
           ) s2
     where s2.thing_id=cm.thing_id
       and s2.is_new
       and cm.category_id=cat_id;
  end;
$f$;


--
-- example
--

insert into categories values (1,'Animals'),(2,'Plants');

insert into things values (1,'foo'),(2,'bar'),(3,'baz'),(4,'fred'),(5,'jim');

select cat_place_item(1,1,null,false);  -- insert 'foo' at start
select cat_place_item(1,2,1,false);     -- insert 'bar' after 'foo'
select cat_place_item(1,3,2,false);     -- insert 'baz' after 'bar'
select cat_place_item(1,4,3,true);      -- insert 'fred' before 'baz'
select cat_place_item(1,5,null,true);   -- insert 'jim' at end

select * from category_member order by (p::float8/q);