Home All Groups Group Topic Archive Search About

how to select only rows where 2 fields are duplicates?

Author
12 Aug 2005 11:00 PM
Rich
create table tbl1(
fld1 varchar(10),
fld2 varchar(20),
fld3 int)

insert into tbl1 values ('abc', 'joe shmo', 1)
insert into tbl1 values ('aaa', 'joe smith', 2)
insert into tbl1 values ('aaa', 'joe smith', 3)
insert into tbl1 values ('bbb', 'tom jones', 4)
insert into tbl1 values ('bbb', 'sue moo', 5)
insert into tbl1 values ('ccc', 'tom jones', 6)
insert into tbl1 values ('ccc', 'sue moo', 7)
insert into tbl1 values ('ddd', 'bill will', 8)
insert into tbl1 values ('ddd', 'bill will', 9)
insert into tbl1 values ('def', 'sha ma', 10)
insert into tbl1 values ('eee', 'ted ned', 11)
insert into tbl1 values ('eee', 'ted ned', 12)
insert into tbl1 values ('fgh', 'chris xing', 13)

select distinct * from tbl1
where fld1 in (select fld1 from tbl1 t group by fld1 having count(fld1) > 1)

--this gives the following:
---------------------------------------
aaa    joe smith    2
aaa    joe smith    3
bbb    sue moo    5
bbb    tom jones    4
ccc    sue moo    7
ccc    tom jones    6
ddd    bill will    8
ddd    bill will    9
eee    ted ned    11
eee    ted ned    12
-------------------------------------

I want to remove  the rows containing

bbb , sue moo
bbb, tom jones
ccc, sue moo
ccc, tom jones

from this list because fld1 and fld2 are not duplicates for these rows. 
What kind of sql statement can select only rows where both fld1 and fld2 are
exact duplicates in the same row(s)?

Thanks,
Rich

Author
12 Aug 2005 11:07 PM
Adam Machanic
select *
from tbl1
where exists
(
    select fld1, fld2
    from tbl1 t2
    where t2.fld1 = tbl1.fld1
    and t2.fld2 = tbl1.fld2
    group by fld1, fld2
    having count(*) > 1
)


--
Adam Machanic
SQL Server MVP
http://www.datamanipulation.net
--


Show quote
"Rich" <R***@discussions.microsoft.com> wrote in message
news:21ED455E-E0F1-450D-BABD-889841A5E76F@microsoft.com...
>
> create table tbl1(
> fld1 varchar(10),
> fld2 varchar(20),
> fld3 int)
>
> insert into tbl1 values ('abc', 'joe shmo', 1)
> insert into tbl1 values ('aaa', 'joe smith', 2)
> insert into tbl1 values ('aaa', 'joe smith', 3)
> insert into tbl1 values ('bbb', 'tom jones', 4)
> insert into tbl1 values ('bbb', 'sue moo', 5)
> insert into tbl1 values ('ccc', 'tom jones', 6)
> insert into tbl1 values ('ccc', 'sue moo', 7)
> insert into tbl1 values ('ddd', 'bill will', 8)
> insert into tbl1 values ('ddd', 'bill will', 9)
> insert into tbl1 values ('def', 'sha ma', 10)
> insert into tbl1 values ('eee', 'ted ned', 11)
> insert into tbl1 values ('eee', 'ted ned', 12)
> insert into tbl1 values ('fgh', 'chris xing', 13)
>
> select distinct * from tbl1
> where fld1 in (select fld1 from tbl1 t group by fld1 having count(fld1) >
1)
>
> --this gives the following:
> ---------------------------------------
> aaa joe smith 2
> aaa joe smith 3
> bbb sue moo 5
> bbb tom jones 4
> ccc sue moo 7
> ccc tom jones 6
> ddd bill will 8
> ddd bill will 9
> eee ted ned 11
> eee ted ned 12
> -------------------------------------
>
> I want to remove  the rows containing
>
> bbb , sue moo
> bbb, tom jones
> ccc, sue moo
> ccc, tom jones
>
> from this list because fld1 and fld2 are not duplicates for these rows.
> What kind of sql statement can select only rows where both fld1 and fld2
are
> exact duplicates in the same row(s)?
>
> Thanks,
> Rich
Author
12 Aug 2005 11:17 PM
Rich
Thanks very much.  Sorry for being such a lamo (I should know this by now). 
I tried the Exists thing but I left out the

>     where t2.fld1 = tbl1.fld1
>     and t2.fld2 = tbl1.fld2

part

Thanks.

Show quote
"Adam Machanic" wrote:

> select *
> from tbl1
> where exists
> (
>     select fld1, fld2
>     from tbl1 t2
>     where t2.fld1 = tbl1.fld1
>     and t2.fld2 = tbl1.fld2
>     group by fld1, fld2
>     having count(*) > 1
> )
>
>
> --
> Adam Machanic
> SQL Server MVP
> http://www.datamanipulation.net
> --
>
>
> "Rich" <R***@discussions.microsoft.com> wrote in message
> news:21ED455E-E0F1-450D-BABD-889841A5E76F@microsoft.com...
> >
> > create table tbl1(
> > fld1 varchar(10),
> > fld2 varchar(20),
> > fld3 int)
> >
> > insert into tbl1 values ('abc', 'joe shmo', 1)
> > insert into tbl1 values ('aaa', 'joe smith', 2)
> > insert into tbl1 values ('aaa', 'joe smith', 3)
> > insert into tbl1 values ('bbb', 'tom jones', 4)
> > insert into tbl1 values ('bbb', 'sue moo', 5)
> > insert into tbl1 values ('ccc', 'tom jones', 6)
> > insert into tbl1 values ('ccc', 'sue moo', 7)
> > insert into tbl1 values ('ddd', 'bill will', 8)
> > insert into tbl1 values ('ddd', 'bill will', 9)
> > insert into tbl1 values ('def', 'sha ma', 10)
> > insert into tbl1 values ('eee', 'ted ned', 11)
> > insert into tbl1 values ('eee', 'ted ned', 12)
> > insert into tbl1 values ('fgh', 'chris xing', 13)
> >
> > select distinct * from tbl1
> > where fld1 in (select fld1 from tbl1 t group by fld1 having count(fld1) >
> 1)
> >
> > --this gives the following:
> > ---------------------------------------
> > aaa joe smith 2
> > aaa joe smith 3
> > bbb sue moo 5
> > bbb tom jones 4
> > ccc sue moo 7
> > ccc tom jones 6
> > ddd bill will 8
> > ddd bill will 9
> > eee ted ned 11
> > eee ted ned 12
> > -------------------------------------
> >
> > I want to remove  the rows containing
> >
> > bbb , sue moo
> > bbb, tom jones
> > ccc, sue moo
> > ccc, tom jones
> >
> > from this list because fld1 and fld2 are not duplicates for these rows.
> > What kind of sql statement can select only rows where both fld1 and fld2
> are
> > exact duplicates in the same row(s)?
> >
> > Thanks,
> > Rich
>
>
>

AddThis Social Bookmark Button