MSBI--enlarge the DW database table volume

时间:2021-08-05 23:25:39

我们在学习MSBI的时候,经常会使用官方提供的Adventureworks和AdventureworksDW示例数据库,但是官方提供的数据量有点小,

以DW为例,Factinternetsales只有不到七万行的数据,我们很难做某些测试,譬如说想对事实表做一个分区测试,测试CUBE性能等.

为此,我想以FactInternetSales表为入口,扩大它的数据量到数亿条.

经过分析,我觉得只要把理解主键是怎么生成的,以及orderdatekey,duedatekey,shipdatekey修改下,随机生成productkey与customerkey

(其实是随机从这两个维表中抽取key),基本上就可以装配好数据.

以下是我的代码,仅供参考.欢迎同学们提供更好的思路.

我现在是先取出salesordernumber最大的那个,然后基于这个进行递加,还有maxorderdate.

一天随机抽取300个产品和1000个客户进行匹配,这样就可以产生三十万条记录.

如果循环700次,因为怕DimDate里面的key不存在(可以自己扩容DimDate)  ,可以保证DateKey是在DimDate里面还有.

即可创建大约二亿条数据.基本可以满足测试的需求了.

随机是使用order by newid()来实现的,然后通过row_number() 来生成行号再加上当前最大的那个salesordernumber来作为新的.

declare
@maxsonumber int,
@maxorderdate date,
@loopcount int
select
@maxsonumber = max(cast(replace(salesordernumber,left(salesordernumber,2),'') as int)),
@maxorderdate =
CAST( substring(cast(max(orderdatekey) as varchar),1,4)
+'-'+substring(cast(max(orderdatekey) as varchar),5,2)
+'-'+substring(cast(max(orderdatekey) as varchar),7,2)
as DATE)
from FactInternetSales
select @maxorderdate--
select @maxsonumber-- set @loopcount = 1 while @loopcount < 100
BEGIN
BEGIN TRAN BEGIN TRY
INSERT INTO [AdventureWorksDW2008R2].[dbo].[FactInternetSales]
([ProductKey]
,[OrderDateKey]
,[DueDateKey]
,[ShipDateKey]
,[CustomerKey]
,[PromotionKey]
,[CurrencyKey]
,[SalesTerritoryKey]
,[SalesOrderNumber]
,[SalesOrderLineNumber]
,[RevisionNumber]
,[OrderQuantity]
,[UnitPrice]
,[ExtendedAmount]
,[UnitPriceDiscountPct]
,[DiscountAmount]
,[ProductStandardCost]
,[TotalProductCost]
,[SalesAmount]
,[TaxAmt]
,[Freight]
,[CarrierTrackingNumber]
,[CustomerPONumber])
SELECT
[ProductKey]
,replace(cast(dateadd(dd,1,@maxorderdate)as varchar),'-','')
,replace(cast(dateadd(dd,1,@maxorderdate)as varchar),'-','')
,replace(cast(dateadd(dd,DaysToManufacture,@maxorderdate) as varchar),'-','')
,[CustomerKey]
,1
,100
,10
,'SO'+CAST(@maxsonumber+rowno AS VARCHAR)
,1
,1
,1
,DealerPrice
,DealerPrice
,0
,0
,StandardCost
,StandardCost
,DealerPrice
,0
,0
,NULL
,NULL
FROM
(
SELECT
ROW_NUMBER() over(order by newid()) as rowno,
*
FROM
(SELECT TOP 300 ProductKey,DealerPrice,StandardCost,DaysToManufacture FROM DimProduct
WHERE DealerPrice IS NOT NULL AND StandardCost is not null ORDER BY NEWID()) AS P
CROSS JOIN
(SELECT TOP 1000 CustomerKey FROM DimCustomer ORDER BY NEWID()) AS C
) AS X
END TRY
BEGIN CATCH
PRINT ERROR_MESSAGE()
IF @@TRANCOUNT >0
ROLLBACK TRAN;
END CATCH
IF @@TRANCOUNT >0
COMMIT TRAN; select
@maxsonumber = max(cast(replace(salesordernumber,left(salesordernumber,2),'') as int)),
@maxorderdate =
CAST( substring(cast(max(orderdatekey) as varchar),1,4)
+'-'+substring(cast(max(orderdatekey) as varchar),5,2)
+'-'+substring(cast(max(orderdatekey) as varchar),7,2)
as DATE)
from FactInternetSales print 'maxsonumber:' + cast(@maxsonumber as varchar)
print 'maxorderdate:' + cast(@maxorderdate as varchar) SET @loopcount += 1
END