Morus Walter
2013-02-04 09:34:54 UTC
Hallo,
I have a question regarding a selection.
I'd like to group and merge certain records having the same values in
some columns, but only if they are contiguous with regard to some sort
order.
So for a table
create table foo (
id int,
user_id int,
key varchar,
sort int );
and values e.g.
insert into foo
values ( 1, 1, 'foo', 1),
( 2, 1, 'foo', 2),
( 3, 1, 'bar', 3),
( 4, 1, 'foo', 4),
( 5, 1, 'foo', 5),
( 6, 1, 'foo', 6),
( 7, 1, 'bla', 7),
( 8, 2, 'bar', 1),
( 9, 2, 'foo', 2),
(10, 2, 'foo', 3),
(11, 2, 'bla', 4);
I'd like to merge all consecutive records (ordered by sort, user_id)
having the same value in user_id and key and keep the first/last
value of sort of the merged records (and probably some more values
from the first or last merged record).
So the result should be something like
user_id, key, sort_first, sort_last
1, 'foo', 1, 2
1, 'bar', 3, 3
1, 'foo', 4, 6
1, 'bla', 7, 7
2, 'bar', 1, 1
2, 'foo', 2, 3
2, 'bla', 4, 4
I was trying to do that using window functions, which works great -
except it merges non consecutive occurences (key foo for user_id 1 in
my sample) as well.
select user_id, key, sort_first, sort_last
from (
select user_id,
key,
first_value(sort) over w as sort_first,
last_value(sort) over w as sort_last,
lead(key) over w as next_key
from foo
window w as (partition by user_id, key order by sort
range between unbounded preceding and unbounded following)
) as foo
where next_key is null
order by user_id, sort_first;
user_id | key | sort_first | sort_last
---------+-----+------------+-----------
1 | foo | 1 | 6 <-- would like to have two records
1/2 and 4/6 here
1 | bar | 3 | 3
1 | bla | 7 | 7
2 | bar | 1 | 1
2 | foo | 2 | 3
2 | bla | 4 | 4
Introducing another window on user_id only allows me to keep two records for
1/foo but I still cannot determine the intended sort_first/sort_last.
select user_id, key, sort_first, sort_last
from (
select user_id,
key,
first_value(sort) over w as sort_first,
last_value(sort) over w as sort_last,
lead(key) over u as next_key
from foo
window u as (partition by user_id order by sort),
w as (partition by user_id, key order by sort
range between unbounded preceding and unbounded following)
) as foo
where next_key is null or key != next_key
order by user_id, sort_first;
user_id | key | sort_first | sort_last
---------+-----+------------+-----------
1 | foo | 1 | 6
1 | foo | 1 | 6
1 | bar | 3 | 3
1 | bla | 7 | 7
2 | bar | 1 | 1
2 | foo | 2 | 3
2 | bla | 4 | 4
So the question is: is this doable with a selection?
Can I use window functions for this type of grouping?
Are there other options?
I do have an alternative plan to select records into a temporary table first,
and then do updates merging two consecutive records and repeat that until
all groups are completely merged, but I'd still like to know if I miss
something regarding selection options.
best
Morus
PS: the alternative plan is something like
select id, user_id,
key,
sort,
sort as sort_last,
lead(key) over u as next_key,
lead(id) over u as next_id,
lag(key) over u as prev_key
into temp table footmp
from foo
window u as (partition by user_id order by sort);
update footmp set sort = f2.sort, prev_key = f2.prev_key
from footmp f2
where footmp.id = f2.next_id and
footmp.key = f2.key and
f2.key = f2.next_key and
( f2.prev_key is null or f2.prev_key != f2.key );
delete from footmp
where id in (
select id from (
select first_value(id) over w as id,
count(*) over w as cnt
from footmp
window w as ( partition by user_id, sort )
) as foo where cnt > 1
);
(repeat update/delete until no row is affected)
select user_id,
key,
sort as sort_first,
sort_last
from footmp
order by user_id, sort_first;
pretty ugly and complicated but at least gives me what I want...
I have a question regarding a selection.
I'd like to group and merge certain records having the same values in
some columns, but only if they are contiguous with regard to some sort
order.
So for a table
create table foo (
id int,
user_id int,
key varchar,
sort int );
and values e.g.
insert into foo
values ( 1, 1, 'foo', 1),
( 2, 1, 'foo', 2),
( 3, 1, 'bar', 3),
( 4, 1, 'foo', 4),
( 5, 1, 'foo', 5),
( 6, 1, 'foo', 6),
( 7, 1, 'bla', 7),
( 8, 2, 'bar', 1),
( 9, 2, 'foo', 2),
(10, 2, 'foo', 3),
(11, 2, 'bla', 4);
I'd like to merge all consecutive records (ordered by sort, user_id)
having the same value in user_id and key and keep the first/last
value of sort of the merged records (and probably some more values
from the first or last merged record).
So the result should be something like
user_id, key, sort_first, sort_last
1, 'foo', 1, 2
1, 'bar', 3, 3
1, 'foo', 4, 6
1, 'bla', 7, 7
2, 'bar', 1, 1
2, 'foo', 2, 3
2, 'bla', 4, 4
I was trying to do that using window functions, which works great -
except it merges non consecutive occurences (key foo for user_id 1 in
my sample) as well.
select user_id, key, sort_first, sort_last
from (
select user_id,
key,
first_value(sort) over w as sort_first,
last_value(sort) over w as sort_last,
lead(key) over w as next_key
from foo
window w as (partition by user_id, key order by sort
range between unbounded preceding and unbounded following)
) as foo
where next_key is null
order by user_id, sort_first;
user_id | key | sort_first | sort_last
---------+-----+------------+-----------
1 | foo | 1 | 6 <-- would like to have two records
1/2 and 4/6 here
1 | bar | 3 | 3
1 | bla | 7 | 7
2 | bar | 1 | 1
2 | foo | 2 | 3
2 | bla | 4 | 4
Introducing another window on user_id only allows me to keep two records for
1/foo but I still cannot determine the intended sort_first/sort_last.
select user_id, key, sort_first, sort_last
from (
select user_id,
key,
first_value(sort) over w as sort_first,
last_value(sort) over w as sort_last,
lead(key) over u as next_key
from foo
window u as (partition by user_id order by sort),
w as (partition by user_id, key order by sort
range between unbounded preceding and unbounded following)
) as foo
where next_key is null or key != next_key
order by user_id, sort_first;
user_id | key | sort_first | sort_last
---------+-----+------------+-----------
1 | foo | 1 | 6
1 | foo | 1 | 6
1 | bar | 3 | 3
1 | bla | 7 | 7
2 | bar | 1 | 1
2 | foo | 2 | 3
2 | bla | 4 | 4
So the question is: is this doable with a selection?
Can I use window functions for this type of grouping?
Are there other options?
I do have an alternative plan to select records into a temporary table first,
and then do updates merging two consecutive records and repeat that until
all groups are completely merged, but I'd still like to know if I miss
something regarding selection options.
best
Morus
PS: the alternative plan is something like
select id, user_id,
key,
sort,
sort as sort_last,
lead(key) over u as next_key,
lead(id) over u as next_id,
lag(key) over u as prev_key
into temp table footmp
from foo
window u as (partition by user_id order by sort);
update footmp set sort = f2.sort, prev_key = f2.prev_key
from footmp f2
where footmp.id = f2.next_id and
footmp.key = f2.key and
f2.key = f2.next_key and
( f2.prev_key is null or f2.prev_key != f2.key );
delete from footmp
where id in (
select id from (
select first_value(id) over w as id,
count(*) over w as cnt
from footmp
window w as ( partition by user_id, sort )
) as foo where cnt > 1
);
(repeat update/delete until no row is affected)
select user_id,
key,
sort as sort_first,
sort_last
from footmp
order by user_id, sort_first;
pretty ugly and complicated but at least gives me what I want...
--
Sent via pgsql-general mailing list (pgsql-***@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general
Sent via pgsql-general mailing list (pgsql-***@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general