x
login about faq Site discussion (meta-askssc)

INSERT multiple related tables without using CURSOR

At many clients I see that they import files one row at a time using cursors. There is a similar theme of inserting one row into a table, getting the @@identity, using that to insert one row into a child table, getting that @@identity and inserting into a third related table etc. then moving on to the next row in the file--all in one huge transaction.

Toy Scenario:

1 I would like to use set based processing for speed

2 I have 10,000,000 rows in a flat data file that need processing. Given the size I don't want it all in one transaction

3 All updates/inserts should be in batches to mitigate locking contention

4 If the server dies in the middle of the import I need to be able to return the state of the db to before the import started

I have come up with the solution below. What I don't like Is I am writing twice, once in the table and once to update the IDs in my import/work table so I'm thinking there is room for speed enhancement. Is there a better solution? What have you done in the past?

CREATE TABLE SSCImport(ImportRowID int identity,ImportID uniqueidentifier,Name varchar(50),Number varchar(50)/* 100 more fields*/,CallAgentID int,CallTime datetime, CustomerID int null,CallLogID int /*...18 more table ids*/)
CREATE TABLE #tmpImported(ImportedRowID int)
CREATE TABLE SSCCustomer(CustomerID int identity, Name varchar(50),Number varchar(50),ImportRowID int,ImportID uniqueidentifier)
CREATE TABLE SSCCallLog(CallLogID int identity,CallAgentID int,CustomerID int,CallTime datetime,ImportRowID int,ImportID uniqueidentifier)
--...18 more tables         
CREATE CLUSTERED INDEX idx ON SSCImport(ImportRowID)
CREATE CLUSTERED INDEX idx ON #tmpImported(ImportedRowID)
CREATE CLUSTERED INDEX idx ON SSCCallLog(CallLogID)
go
CREATE FUNCTION dbo.GetExistingCustomerID(@Name varchar(50),@Number varchar(50))
RETURNS int
AS
BEGIN
    --complex multi-statement logic to determine if customer exists
    DECLARE @CustomerID int
    SET @CustomerID = CASE WHEN @Name='Alice' AND @Number='1111111111' THEN 1 ELSE NULL END
    RETURN @CustomerID
END
go
DECLARE @ImportID uniqueidentifier
SET @ImportID = NEWID()

--create existing customer
INSERT SSCCustomer
SELECT 'Alice','1111111111',1,NEWID()

--bulk insert records into import table (10,000,000 rows)
INSERT SSCImport
SELECT @ImportID,'Alice','1111111111',1,'2010-01-09 09:47:44.410',NULL,NULL UNION ALL
SELECT @ImportID,'Mary','2222222222',2,'2010-01-09 010:47:44.410',NULL,NULL UNION ALL
SELECT @ImportID,'Bob','3333333333',3,'2010-01-09 011:47:44.410',NULL,NULL

DECLARE @BatchRows int,@RowCount int
SET @BatchRows = 5000

--query from MDM/CDI store to get existing customerid, store customerid in import table for later reference 
BEGIN TRY
WHILE 1=1
BEGIN
    BEGIN TRAN
    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    UPDATE TOP(@BatchRows) i 
    SET CustomerID = dbo.GetExistingCustomerID(Name,Number)
    OUTPUT inserted.ImportRowID INTO #tmpImported
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE cteImported.ImportedRowID IS NULL
    AND i.ImportID = @ImportID
    SET @RowCount = @@ROWCOUNT

    COMMIT TRAN
    IF @RowCount < @BatchRows BREAK
END

--insert new customers
TRUNCATE TABLE #tmpImported
WHILE 1=1
BEGIN
    BEGIN TRAN
    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    INSERT TOP (@BatchRows) SSCCustomer
    OUTPUT inserted.ImportRowID INTO #tmpImported
    SELECT Name,Number,ImportRowID,@ImportID
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE cteImported.ImportedRowID IS NULL
    AND i.CustomerID IS NULL
    AND i.ImportID = @ImportID
    SET @RowCount = @@ROWCOUNT

    --store customerid in import table for later reference
    UPDATE i
    SET CustomerID = c.CustomerID
    FROM SSCCustomer c
    JOIN SSCImport i ON (i.ImportID = c.ImportID AND i.ImportRowID = c.ImportRowID)

    COMMIT TRAN
    IF @RowCount < @BatchRows BREAK
END

--insert new call log data
TRUNCATE TABLE #tmpImported
WHILE 1=1
BEGIN
    BEGIN TRAN
    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    INSERT TOP (@BatchRows) SSCCallLog
    OUTPUT inserted.ImportRowID INTO #tmpImported
    SELECT CallAgentID,CustomerID,CallTime,ImportRowID,@ImportID
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE cteImported.ImportedRowID IS NULL
    AND i.CustomerID IS NOT NULL
    AND i.ImportID = @ImportID
    SET @RowCount = @@ROWCOUNT

    --store calllogid in import table for later reference
    UPDATE i
    SET CallLogID = cl.CallLogID
    FROM SSCCallLog cl
    JOIN SSCImport i ON (i.ImportID = cl.ImportID AND i.ImportRowID = cl.ImportRowID)

    COMMIT TRAN
    IF @RowCount < @BatchRows BREAK
END
-- ...update 18 more tables

END TRY
BEGIN CATCH
    IF XACT_STATE() <> 0 ROLLBACK TRAN
    DELETE FROM SSCCustomer WHERE ImportID = @ImportID
    DELETE FROM SSCCallLog WHERE ImportID = @ImportID
END CATCH

SELECT * FROM SSCImport
SELECT * FROM SSCCustomer
SELECT * FROM SSCCallLog

DROP TABLE SSCImport,#tmpImported,SSCCustomer,SSCCallLog
DROP FUNCTION dbo.GetExistingCustomerID

more ▼

asked Jan 09 '10 at 06:24 PM in Default

Scot Hauder gravatar image

Scot Hauder
5.7k 13 15 18

Have you considered bcp or SSIS?

Jan 09 '10 at 06:48 PM Håkan Winther
(comments are locked)
10|1200 characters needed characters left

3 answers: sort voted first

I would say that this looks pretty sweet to me. I've had very similar experiences in the past, particularly with one system that had to load data from 350 petrol stations in near-real time. Initially there was an RBAR solution, and I re-wrote it to something similar to the above, which really made the whole thing a lot less painful.

The only other solution that I thought of was to use GUIDs instead of int IDENTITY rows, and use those for all primary/foreign keys. That way, you could pre-process the data on the client, and load directly into the target tables using BCP, knowing that you had already related one set of data to the other using client-generated GUIDs. However, I thought that if I ever had to load a large set of data (which I did once, after a tech guy broke the storage system by removing a disk shelf accidentally), then the amount of memory needed to do the client side relational work would be pretty prohibitive. I also thought that it would be slightly arrogant of me to assume that I would be better at handling large data operations than the people who wrote SQL Server...

more ▼

answered Jan 09 '10 at 06:56 PM

Matt Whitfield gravatar image

Matt Whitfield ♦♦
29.2k 56 63 87

Interesting point about relating the data on the client using GUIDs. Files could be split and the processing spread across multiple pcs if needed. Then SSIS or bcp could be used to dump the data quite quickly. I had to do this years ago and didn't know if there was a better way now.

Jan 09 '10 at 07:10 PM Scot Hauder

You've got me thinking, I like the idea of client side relating but I also like 4 byte IDS. If only there were an "ID broker" where you could request/reserve a range of IDs, do the relating on the client and then insert at your leisure. What comes to mind is take an exclusive lock on the table and reseed the identity+=@batchsize then do an identity insert on to add the records. I might get this thing to fly

Jan 09 '10 at 09:51 PM Scot Hauder
(comments are locked)
10|1200 characters needed characters left

Ok Matt, here ya go. I will try it out on a 10,000,000 file

CREATE TABLE SSCImport(ImportRowID int identity,ImportID uniqueidentifier,Name varchar(50),Number varchar(50)/* 100 more fields*/,CallAgentID int,CallTime datetime, CustomerID int null,CallLogID int /*...18 more table ids*/)
CREATE TABLE #tmpImported(ImportedRowID int)
CREATE TABLE SSCCustomer(CustomerID int identity(1,1), Name varchar(50),Number varchar(50),ImportRowID int,ImportID uniqueidentifier)
CREATE TABLE SSCCallLog(CallLogID int identity(1,1),CallAgentID int,CustomerID int,CallTime datetime,ImportRowID int,ImportID uniqueidentifier)
--...18 more tables         
CREATE CLUSTERED INDEX idx ON SSCImport(ImportRowID)
CREATE CLUSTERED INDEX idx ON #tmpImported(ImportedRowID)
CREATE CLUSTERED INDEX idx ON SSCCallLog(CallLogID)
go
CREATE PROC ReserveIDs(@TableName varchar(50),@NumberOfIDs int, @RangeStart int OUT)
WITH EXECUTE AS OWNER
AS
BEGIN
    SET NOCOUNT ON
    DECLARE @ret int

    BEGIN TRAN
    IF @TableName = 'SSCCustomer'
    BEGIN
        SELECT @ret = COUNT(*) FROM SSCCustomer WITH(HOLDLOCK,TABLOCKX)
        SELECT @RangeStart = IDENT_CURRENT(@TableName) 
        SET @NumberOfIDs = @NumberOfIDs + @RangeStart
        DBCC CHECKIDENT(@TableName,RESEED, @NumberOfIDs)
    END

    IF @TableName = 'SSCCallLog'
    BEGIN
        SELECT @ret = COUNT(*) FROM SSCCallLog WITH(HOLDLOCK,TABLOCKX)
        SELECT @RangeStart = IDENT_CURRENT(@TableName) 
        SET @NumberOfIDs = @NumberOfIDs + @RangeStart
        DBCC CHECKIDENT(@TableName,RESEED, @NumberOfIDs)
    END
    COMMIT TRAN

END
go

CREATE FUNCTION dbo.GetExistingCustomerID(@Name varchar(50),@Number varchar(50))
RETURNS int
AS
BEGIN
    --complex multi-statement logic to determine if customer exists
    DECLARE @CustomerID int
    SET @CustomerID = CASE WHEN @Name='Alice' AND @Number='1111111111' THEN 1 ELSE NULL END
    RETURN @CustomerID
END
go

DECLARE @ImportID uniqueidentifier
SET @ImportID = NEWID()

--create existing customer
INSERT SSCCustomer
SELECT 'Alice','1111111111',1,NEWID()

--bulk insert records into import table (10,000,000 rows)
INSERT SSCImport
SELECT @ImportID,'Alice','1111111111',1,'2010-01-09 09:47:44.410',NULL,NULL UNION ALL
SELECT @ImportID,'Mary','2222222222',2,'2010-01-09 010:47:44.410',NULL,NULL UNION ALL
SELECT @ImportID,'Bob','3333333333',3,'2010-01-09 011:47:44.410',NULL,NULL

DECLARE @BatchRows int,@RowCount int, @NumberOfRows int, @RangeStart int, @RangeEnd int, @NewID int
SET @BatchRows = 5000

--query from MDM/CDI store to get existing customerid, store customerid in import table for later reference 
BEGIN TRY
WHILE 1=1
BEGIN
    BEGIN TRAN
    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    UPDATE TOP(@BatchRows) i 
    SET CustomerID = dbo.GetExistingCustomerID(Name,Number)
    OUTPUT inserted.ImportRowID INTO #tmpImported
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE cteImported.ImportedRowID IS NULL
    AND i.ImportID = @ImportID
    SET @RowCount = @@ROWCOUNT

    COMMIT TRAN
    IF @RowCount < @BatchRows BREAK
END
--get number of new customer rows to insert
SELECT @NumberOfRows = COUNT(*)
FROM SSCImport
WHERE CustomerID IS NULL

--update import table with ids just reserved
EXEC dbo.ReserveIDs 'SSCCustomer',@NumberOfRows,@NewID OUT
SET @RangeStart = @NewID + 1
TRUNCATE TABLE #tmpImported
WHILE 1=1
BEGIN
    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    UPDATE TOP(@BatchRows) i 
    SET @NewID = CustomerID = @NewID + 1
    OUTPUT inserted.ImportRowID INTO #tmpImported
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE CustomerID IS NULL
    SET @RowCount = @@ROWCOUNT

    IF @RowCount < @BatchRows BREAK
END
SET @RangeEnd = @NewID

--insert new customers
TRUNCATE TABLE #tmpImported
WHILE 1=1
BEGIN
    BEGIN TRAN
    SET IDENTITY_INSERT SSCCustomer ON

    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    INSERT TOP (@BatchRows) SSCCustomer(CustomerID, Name,Number,ImportRowID,ImportID)
    OUTPUT inserted.ImportRowID INTO #tmpImported
    SELECT CustomerID, Name,Number,ImportRowID,@ImportID
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE cteImported.ImportedRowID IS NULL
    AND i.CustomerID BETWEEN @RangeStart AND @RangeEnd
    AND i.ImportID = @ImportID
    SET @RowCount = @@ROWCOUNT

    SET IDENTITY_INSERT SSCCustomer OFF
    COMMIT TRAN
    IF @RowCount < @BatchRows BREAK
END

--get number of new call log rows to insert
SELECT @NumberOfRows = COUNT(*)
FROM SSCImport
WHERE CallLogID IS NULL

--update import table with ids just reserved
EXEC dbo.ReserveIDs 'SSCCallLog',@NumberOfRows,@NewID OUT
SET @RangeStart = @NewID + 1
TRUNCATE TABLE #tmpImported
WHILE 1=1
BEGIN
    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    UPDATE TOP(@BatchRows) i 
    SET @NewID = CallLogID = @NewID + 1
    OUTPUT inserted.ImportRowID INTO #tmpImported
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE CallLogID IS NULL
    SET @RowCount = @@ROWCOUNT

    IF @RowCount < @BatchRows BREAK
END
SET @RangeEnd = @NewID

--insert new call log data
TRUNCATE TABLE #tmpImported
WHILE 1=1
BEGIN
    BEGIN TRAN
    SET IDENTITY_INSERT SSCCallLog ON

    ;WITH cteImported AS(
    SELECT ImportedRowID from #tmpImported
    )
    INSERT TOP (@BatchRows) SSCCallLog(CallLogID,CallAgentID,CustomerID,CallTime,ImportRowID,ImportID)
    OUTPUT inserted.ImportRowID INTO #tmpImported
    SELECT CallLogID,CallAgentID,CustomerID,CallTime,ImportRowID,@ImportID
    FROM SSCImport i
    LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
    WHERE cteImported.ImportedRowID IS NULL
    AND i.CallLogID BETWEEN @RangeStart AND @RangeEnd
    AND i.ImportID = @ImportID
    SET @RowCount = @@ROWCOUNT

    SET IDENTITY_INSERT SSCCallLog OFF
    COMMIT TRAN
    IF @RowCount < @BatchRows BREAK
END
-- ...update 18 more tables

END TRY
BEGIN CATCH
    IF XACT_STATE() <> 0 ROLLBACK TRAN
    DELETE FROM SSCCustomer WHERE ImportID = @ImportID
    DELETE FROM SSCCallLog WHERE ImportID = @ImportID
END CATCH

SELECT * FROM SSCImport
SELECT * FROM SSCCustomer
SELECT * FROM SSCCallLog


DROP TABLE SSCImport,#tmpImported,SSCCustomer,SSCCallLog
DROP FUNCTION dbo.GetExistingCustomerID
DROP PROC dbo.ReserveIDs

more ▼

answered Jan 10 '10 at 01:33 AM

Scot Hauder gravatar image

Scot Hauder
5.7k 13 15 18

(comments are locked)
10|1200 characters needed characters left

I use a sequence table for this. See This SSC Thread for an example script (in the context of the SSC thread).

more ▼

answered Jan 14 '10 at 01:51 AM

SQL Kiwi gravatar image

SQL Kiwi
1k 1 4

(comments are locked)
10|1200 characters needed characters left
Your answer
toggle preview:

Up to 2 attachments (including images) can be used with a maximum of 524.3 kB each and 1.0 MB total.

Follow this question

By Email:

Once you sign in you will be able to subscribe for any updates here

By RSS:

Answers

Answers and Comments



Facebook logo Follow Ask SSC on Facebook
Find Ask SSC on Google+
linkedin logo Find us on LinkedIn

Topics:

x58
x11

asked: Jan 09 '10 at 06:24 PM

Seen: 3403 times

Last Updated: Jan 09 '10 at 06:49 PM

Copyright © 2002-2012 Simple Talk Publishing. All Rights Reserved. If you have any queries, please contact the site administrators.
Ask SQL Server Central is a community service provided by Red Gate.