I would like to know the most efficient way to create a group for objects related to one another. The data is in the form:
ObjectId1 ObjectId2
1 2
2 1
1 1
3 2
2 3
3 3
So 1 is related to 2 (and all objects are related to themselves) and 3 is related to 2. As both 1 and 3 are related to 2, I would like to know they are also related to each other. So the final output would be:
Group ObjectId
A 1
A 2
A 3
The group doesn't have to be called a letter, I've just used this as an example to show it's not an object id.
The solution I have come up with is below, but it doesn't seem as efficient as it could be and over complicated:
--sample data
DECLARE @Links TABLE(
ObjectId1 int,
ObjectId2 int,
Primary Key(ObjectId1,ObjectId2)
)
INSERT INTO @Links
SELECT 1,2 UNION ALL SELECT 1,1 UNION ALL SELECT 2,3 UNION ALL SELECT 2,1 UNION ALL SELECT 2,2 UNION ALL
SELECT 3,2 UNION ALL SELECT 3,3 UNION ALL SELECT 3,4 UNION ALL SELECT 4,3 UNION ALL SELECT 4,4 UNION ALL
SELECT 4,5 UNION ALL SELECT 5,5 UNION ALL SELECT 5,4 UNION ALL SELECT 1,6 UNION ALL SELECT 6,1 UNION ALL
SELECT 6,2 UNION ALL SELECT 2,6 UNION ALL SELECT 6,6 UNION ALL SELECT 5,7 UNION ALL SELECT 7,5 UNION ALL
SELECT 7,7 UNION ALL SELECT 8,8 UNION ALL SELECT 9,9 UNION ALL SELECT 10,9 UNION ALL SELECT 9,10 UNION ALL
SELECT 10,3 UNION ALL SELECT 3,10 UNION ALL SELECT 15,16 UNION ALL SELECT 16,15 UNION ALL SELECT 1,5 UNION ALL
SELECT 4,1 UNION ALL SELECT 10,10 UNION ALL SELECT 15,15 UNION ALL SELECT 16,16
--get rid of duplicate information, only take one link between two objects
DECLARE @TempGroups TABLE(
ObjectId int,
LowestObjectId int
)
INSERT INTO @TempGroups
SELECT ObjectId2,ObjectId1
FROM @Links
WHERE ObjectId1<ObjectId2
--loop relates all groups to the same lowestid
DECLARE @UpdateRows int
SET @UpdateRows=1
WHILE @UpdateRows>0 --run until now further updates
BEGIN
--relates lowestids to ids
--i.e.
--lowestid id
--1 2
--2 3
--goes to
--1 2
--1 3
UPDATE High
SET LowestObjectId=Low.LowestObjectId
FROM
@TempGroups Low
JOIN
@TempGroups High ON
Low.ObjectId=High.LowestObjectId
SET @UpdateRows=@@Rowcount
--relates ids
--i.e.
--lowestid id
--1 3
--2 3
--goes to
--1 3
--1 2
UPDATE High
SET LowestObjectId=Low.LowestObjectId,ObjectId=High.LowestObjectId
FROM
@TempGroups Low
JOIN
@TempGroups High ON
Low.ObjectId=High.ObjectId
AND
Low.LowestObjectId<High.LowestObjectId
SET @UpdateRows=@UpdateRows+@@Rowcount
END
--get into final form
DECLARE @Groups TABLE(
ObjectId int,
GroupId int,
Primary Key(ObjectId)
)
--only show unique results
INSERT INTO @Groups
SELECT DISTINCT ObjectId,LowestObjectId FROM @TempGroups
--lowest object ids belong to their own groups
INSERT INTO @Groups
SELECT DISTINCT LowestObjectId,LowestObjectId FROM @TempGroups
--not really needed but creates arbitary group ids rather than those based on object ids
UPDATE G
SET GroupId=NG.NewGroupId
FROM
@Groups G
JOIN
(
SELECT DISTINCT
GroupId,
NewGroupId=DENSE_RANK() OVER(ORDER BY GroupId)
FROM
@Groups
) NG ON
NG.GroupId=G.GroupId
--show results
SELECT * FROM @Groups