x

INSERT multiple related tables without using CURSOR

At many clients I see that they import files one row at a time using cursors. There is a similar theme of inserting one row into a table, getting the @@identity, using that to insert one row into a child table, getting that @@identity and inserting into a third related table etc. then moving on to the next row in the file--all in one huge transaction.

Toy Scenario:

1 I would like to use set based processing for speed

2 I have 10,000,000 rows in a flat data file that need processing. Given the size I don't want it all in one transaction

3 All updates/inserts should be in batches to mitigate locking contention

4 If the server dies in the middle of the import I need to be able to return the state of the db to before the import started

I have come up with the solution below. What I don't like Is I am writing twice, once in the table and once to update the IDs in my import/work table so I'm thinking there is room for speed enhancement. Is there a better solution? What have you done in the past?

CREATE TABLE SSCImport(ImportRowID int identity,ImportID uniqueidentifier,Name varchar(50),Number varchar(50)/* 100 more fields*/,CallAgentID int,CallTime datetime, CustomerID int null,CallLogID int /*...18 more table ids*/) CREATE TABLE #tmpImported(ImportedRowID int) CREATE TABLE SSCCustomer(CustomerID int identity, Name varchar(50),Number varchar(50),ImportRowID int,ImportID uniqueidentifier) CREATE TABLE SSCCallLog(CallLogID int identity,CallAgentID int,CustomerID int,CallTime datetime,ImportRowID int,ImportID uniqueidentifier) --...18 more tables CREATE CLUSTERED INDEX idx ON SSCImport(ImportRowID) CREATE CLUSTERED INDEX idx ON #tmpImported(ImportedRowID) CREATE CLUSTERED INDEX idx ON SSCCallLog(CallLogID) go CREATE FUNCTION dbo.GetExistingCustomerID(@Name varchar(50),@Number varchar(50)) RETURNS int AS BEGIN --complex multi-statement logic to determine if customer exists DECLARE @CustomerID int SET @CustomerID = CASE WHEN @Name='Alice' AND @Number='1111111111' THEN 1 ELSE NULL END RETURN @CustomerID END go DECLARE @ImportID uniqueidentifier SET @ImportID = NEWID()

--create existing customer INSERT SSCCustomer SELECT 'Alice','1111111111',1,NEWID()

--bulk insert records into import table (10,000,000 rows) INSERT SSCImport SELECT @ImportID,'Alice','1111111111',1,'2010-01-09 09:47:44.410',NULL,NULL UNION ALL SELECT @ImportID,'Mary','2222222222',2,'2010-01-09 010:47:44.410',NULL,NULL UNION ALL SELECT @ImportID,'Bob','3333333333',3,'2010-01-09 011:47:44.410',NULL,NULL

DECLARE @BatchRows int,@RowCount int SET @BatchRows = 5000

--query from MDM/CDI store to get existing customerid, store customerid in import table for later reference BEGIN TRY WHILE 1=1 BEGIN BEGIN TRAN ;WITH cteImported AS( SELECT ImportedRowID from #tmpImported ) UPDATE TOP(@BatchRows) i SET CustomerID = dbo.GetExistingCustomerID(Name,Number) OUTPUT inserted.ImportRowID INTO #tmpImported FROM SSCImport i LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID) WHERE cteImported.ImportedRowID IS NULL AND i.ImportID = @ImportID SET @RowCount = @@ROWCOUNT

 COMMIT TRAN
 IF @RowCount < @BatchRows BREAK

END

--insert new customers TRUNCATE TABLE #tmpImported WHILE 1=1 BEGIN BEGIN TRAN ;WITH cteImported AS( SELECT ImportedRowID from #tmpImported ) INSERT TOP (@BatchRows) SSCCustomer OUTPUT inserted.ImportRowID INTO #tmpImported SELECT Name,Number,ImportRowID,@ImportID FROM SSCImport i LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID) WHERE cteImported.ImportedRowID IS NULL AND i.CustomerID IS NULL AND i.ImportID = @ImportID SET @RowCount = @@ROWCOUNT

 --store customerid in import table for later reference
 UPDATE i
 SET CustomerID = c.CustomerID
 FROM SSCCustomer c
 JOIN SSCImport i ON (i.ImportID = c.ImportID AND i.ImportRowID = c.ImportRowID)

 COMMIT TRAN
 IF @RowCount < @BatchRows BREAK

END

--insert new call log data TRUNCATE TABLE #tmpImported WHILE 1=1 BEGIN BEGIN TRAN ;WITH cteImported AS( SELECT ImportedRowID from #tmpImported ) INSERT TOP (@BatchRows) SSCCallLog OUTPUT inserted.ImportRowID INTO #tmpImported SELECT CallAgentID,CustomerID,CallTime,ImportRowID,@ImportID FROM SSCImport i LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID) WHERE cteImported.ImportedRowID IS NULL AND i.CustomerID IS NOT NULL AND i.ImportID = @ImportID SET @RowCount = @@ROWCOUNT

 --store calllogid in import table for later reference
 UPDATE i
 SET CallLogID = cl.CallLogID
 FROM SSCCallLog cl
 JOIN SSCImport i ON (i.ImportID = cl.ImportID AND i.ImportRowID = cl.ImportRowID)

 COMMIT TRAN
 IF @RowCount < @BatchRows BREAK

END -- ...update 18 more tables

END TRY BEGIN CATCH IF XACT_STATE() <> 0 ROLLBACK TRAN DELETE FROM SSCCustomer WHERE ImportID = @ImportID DELETE FROM SSCCallLog WHERE ImportID = @ImportID END CATCH

SELECT FROM SSCImport SELECT FROM SSCCustomer SELECT * FROM SSCCallLog

DROP TABLE SSCImport,#tmpImported,SSCCustomer,SSCCallLog DROP FUNCTION dbo.GetExistingCustomerID

more ▼

asked Jan 09, 2010 at 06:24 PM in Default

avatar image

Scot Hauder
6.4k 13 16 22

Have you considered bcp or SSIS?

Jan 09, 2010 at 06:48 PM Håkan Winther
(comments are locked)
10|1200 characters needed characters left

3 answers: sort voted first

I would say that this looks pretty sweet to me. I've had very similar experiences in the past, particularly with one system that had to load data from 350 petrol stations in near-real time. Initially there was an RBAR solution, and I re-wrote it to something similar to the above, which really made the whole thing a lot less painful.

The only other solution that I thought of was to use GUIDs instead of int IDENTITY rows, and use those for all primary/foreign keys. That way, you could pre-process the data on the client, and load directly into the target tables using BCP, knowing that you had already related one set of data to the other using client-generated GUIDs. However, I thought that if I ever had to load a large set of data (which I did once, after a tech guy broke the storage system by removing a disk shelf accidentally), then the amount of memory needed to do the client side relational work would be pretty prohibitive. I also thought that it would be slightly arrogant of me to assume that I would be better at handling large data operations than the people who wrote SQL Server...

more ▼

answered Jan 09, 2010 at 06:56 PM

avatar image

Matt Whitfield ♦♦
29.5k 62 66 88

(comments are locked)
10|1200 characters needed characters left

Ok Matt, here ya go. I will try it out on a 10,000,000 file

CREATE TABLE SSCImport(ImportRowID int identity,ImportID uniqueidentifier,Name varchar(50),Number varchar(50)/* 100 more fields*/,CallAgentID int,CallTime datetime, CustomerID int null,CallLogID int /*...18 more table ids*/) CREATE TABLE #tmpImported(ImportedRowID int) CREATE TABLE SSCCustomer(CustomerID int identity(1,1), Name varchar(50),Number varchar(50),ImportRowID int,ImportID uniqueidentifier) CREATE TABLE SSCCallLog(CallLogID int identity(1,1),CallAgentID int,CustomerID int,CallTime datetime,ImportRowID int,ImportID uniqueidentifier) --...18 more tables CREATE CLUSTERED INDEX idx ON SSCImport(ImportRowID) CREATE CLUSTERED INDEX idx ON #tmpImported(ImportedRowID) CREATE CLUSTERED INDEX idx ON SSCCallLog(CallLogID) go CREATE PROC ReserveIDs(@TableName varchar(50),@NumberOfIDs int, @RangeStart int OUT) WITH EXECUTE AS OWNER AS BEGIN SET NOCOUNT ON DECLARE @ret int

 BEGIN TRAN
 IF @TableName = 'SSCCustomer'
 BEGIN
     SELECT @ret = COUNT(*) FROM SSCCustomer WITH(HOLDLOCK,TABLOCKX)
     SELECT @RangeStart = IDENT_CURRENT(@TableName) 
     SET @NumberOfIDs = @NumberOfIDs + @RangeStart
     DBCC CHECKIDENT(@TableName,RESEED, @NumberOfIDs)
 END

 IF @TableName = 'SSCCallLog'
 BEGIN
     SELECT @ret = COUNT(*) FROM SSCCallLog WITH(HOLDLOCK,TABLOCKX)
     SELECT @RangeStart = IDENT_CURRENT(@TableName) 
     SET @NumberOfIDs = @NumberOfIDs + @RangeStart
     DBCC CHECKIDENT(@TableName,RESEED, @NumberOfIDs)
 END
 COMMIT TRAN

END go

CREATE FUNCTION dbo.GetExistingCustomerID(@Name varchar(50),@Number varchar(50)) RETURNS int AS BEGIN --complex multi-statement logic to determine if customer exists DECLARE @CustomerID int SET @CustomerID = CASE WHEN @Name='Alice' AND @Number='1111111111' THEN 1 ELSE NULL END RETURN @CustomerID END go

DECLARE @ImportID uniqueidentifier SET @ImportID = NEWID()

--create existing customer INSERT SSCCustomer SELECT 'Alice','1111111111',1,NEWID()

--bulk insert records into import table (10,000,000 rows) INSERT SSCImport SELECT @ImportID,'Alice','1111111111',1,'2010-01-09 09:47:44.410',NULL,NULL UNION ALL SELECT @ImportID,'Mary','2222222222',2,'2010-01-09 010:47:44.410',NULL,NULL UNION ALL SELECT @ImportID,'Bob','3333333333',3,'2010-01-09 011:47:44.410',NULL,NULL

DECLARE @BatchRows int,@RowCount int, @NumberOfRows int, @RangeStart int, @RangeEnd int, @NewID int SET @BatchRows = 5000

--query from MDM/CDI store to get existing customerid, store customerid in import table for later reference BEGIN TRY WHILE 1=1 BEGIN BEGIN TRAN ;WITH cteImported AS( SELECT ImportedRowID from #tmpImported ) UPDATE TOP(@BatchRows) i SET CustomerID = dbo.GetExistingCustomerID(Name,Number) OUTPUT inserted.ImportRowID INTO #tmpImported FROM SSCImport i LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID) WHERE cteImported.ImportedRowID IS NULL AND i.ImportID = @ImportID SET @RowCount = @@ROWCOUNT

 COMMIT TRAN
 IF @RowCount &lt; @BatchRows BREAK

END --get number of new customer rows to insert SELECT @NumberOfRows = COUNT(*) FROM SSCImport WHERE CustomerID IS NULL

--update import table with ids just reserved EXEC dbo.ReserveIDs 'SSCCustomer',@NumberOfRows,@NewID OUT SET @RangeStart = @NewID + 1 TRUNCATE TABLE #tmpImported WHILE 1=1 BEGIN ;WITH cteImported AS( SELECT ImportedRowID from #tmpImported ) UPDATE TOP(@BatchRows) i SET @NewID = CustomerID = @NewID + 1 OUTPUT inserted.ImportRowID INTO #tmpImported FROM SSCImport i LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID) WHERE CustomerID IS NULL SET @RowCount = @@ROWCOUNT

 IF @RowCount &lt; @BatchRows BREAK

END SET @RangeEnd = @NewID

--insert new customers TRUNCATE TABLE #tmpImported WHILE 1=1 BEGIN BEGIN TRAN SET IDENTITY_INSERT SSCCustomer ON

 ;WITH cteImported AS(
 SELECT ImportedRowID from #tmpImported
 )
 INSERT TOP (@BatchRows) SSCCustomer(CustomerID, Name,Number,ImportRowID,ImportID)
 OUTPUT inserted.ImportRowID INTO #tmpImported
 SELECT CustomerID, Name,Number,ImportRowID,@ImportID
 FROM SSCImport i
 LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
 WHERE cteImported.ImportedRowID IS NULL
 AND i.CustomerID BETWEEN @RangeStart AND @RangeEnd
 AND i.ImportID = @ImportID
 SET @RowCount = @@ROWCOUNT

 SET IDENTITY_INSERT SSCCustomer OFF
 COMMIT TRAN
 IF @RowCount &lt; @BatchRows BREAK

END

--get number of new call log rows to insert SELECT @NumberOfRows = COUNT(*) FROM SSCImport WHERE CallLogID IS NULL

--update import table with ids just reserved EXEC dbo.ReserveIDs 'SSCCallLog',@NumberOfRows,@NewID OUT SET @RangeStart = @NewID + 1 TRUNCATE TABLE #tmpImported WHILE 1=1 BEGIN ;WITH cteImported AS( SELECT ImportedRowID from #tmpImported ) UPDATE TOP(@BatchRows) i SET @NewID = CallLogID = @NewID + 1 OUTPUT inserted.ImportRowID INTO #tmpImported FROM SSCImport i LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID) WHERE CallLogID IS NULL SET @RowCount = @@ROWCOUNT

 IF @RowCount &lt; @BatchRows BREAK

END SET @RangeEnd = @NewID

--insert new call log data TRUNCATE TABLE #tmpImported WHILE 1=1 BEGIN BEGIN TRAN SET IDENTITY_INSERT SSCCallLog ON

 ;WITH cteImported AS(
 SELECT ImportedRowID from #tmpImported
 )
 INSERT TOP (@BatchRows) SSCCallLog(CallLogID,CallAgentID,CustomerID,CallTime,ImportRowID,ImportID)
 OUTPUT inserted.ImportRowID INTO #tmpImported
 SELECT CallLogID,CallAgentID,CustomerID,CallTime,ImportRowID,@ImportID
 FROM SSCImport i
 LEFT JOIN cteImported ON (cteImported.ImportedRowID = i.ImportRowID)
 WHERE cteImported.ImportedRowID IS NULL
 AND i.CallLogID BETWEEN @RangeStart AND @RangeEnd
 AND i.ImportID = @ImportID
 SET @RowCount = @@ROWCOUNT

 SET IDENTITY_INSERT SSCCallLog OFF
 COMMIT TRAN
 IF @RowCount &lt; @BatchRows BREAK

END -- ...update 18 more tables

END TRY BEGIN CATCH IF XACT_STATE() <> 0 ROLLBACK TRAN DELETE FROM SSCCustomer WHERE ImportID = @ImportID DELETE FROM SSCCallLog WHERE ImportID = @ImportID END CATCH

SELECT FROM SSCImport SELECT FROM SSCCustomer SELECT * FROM SSCCallLog

DROP TABLE SSCImport,#tmpImported,SSCCustomer,SSCCallLog DROP FUNCTION dbo.GetExistingCustomerID DROP PROC dbo.ReserveIDs

more ▼

answered Jan 10, 2010 at 01:33 AM

avatar image

Scot Hauder
6.4k 13 16 22

(comments are locked)
10|1200 characters needed characters left

I use a sequence table for this. See This SSC Thread for an example script (in the context of the SSC thread).

more ▼

answered Jan 14, 2010 at 01:51 AM

avatar image

SQL Kiwi
1.2k 1 3 6

(comments are locked)
10|1200 characters needed characters left
Your answer
toggle preview:

Up to 2 attachments (including images) can be used with a maximum of 524.3 kB each and 1.0 MB total.

Follow this question

By Email:

Once you sign in you will be able to subscribe for any updates here

By RSS:

Answers

Answers and Comments

SQL Server Central

Need long-form SQL discussion? SQLserverCentral.com is the place.

Topics:

x70
x14

asked: Jan 09, 2010 at 06:24 PM

Seen: 4904 times

Last Updated: Jan 09, 2010 at 06:49 PM

Copyright 2016 Redgate Software. Privacy Policy