/*============================================================================
  File:     02_Joins.sql

  SQL Server Versions: 2008, 2008R2, 2012, 2014, 2016, 2017, 2019
------------------------------------------------------------------------------
  Written by Erin Stellato, SQLskills.com
  
  (c) 2019, SQLskills.com. All rights reserved.

  For more scripts and sample code, check out 
    http://www.SQLskills.com

  You may alter this code for your own *non-commercial* purposes. You may
  republish altered code as long as you include this copyright and give due
  credit, but you must obtain prior permission before blogging this code.
  
  THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF 
  ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED 
  TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  PARTICULAR PURPOSE.
============================================================================*/
USE [AdventureWorks2019];
GO

SET STATISTICS IO, TIME ON;
GO

/*
	what's the "outer" table?
	what's the "inner" table?
	What is the cost?
	--
*/
SELECT 
	[h].[CustomerID], 
	[d].[ProductID]
FROM [Sales].[SalesOrderDetail] [d] 
INNER JOIN [Sales].[SalesOrderHeader] [h] 
	ON [d].[SalesOrderID] = [h].[SalesOrderID]
WHERE [h].[CustomerID] = 29712;
GO


/*
	What if we reverse the OUTER/INNER
	*do not recommend this hint
*/
SELECT 
	[h].[CustomerID], 
	[d].[ProductID]
FROM [Sales].[SalesOrderDetail] [d] 
INNER JOIN [Sales].[SalesOrderHeader] [h] 
	ON [d].[SalesOrderID] = [h].[SalesOrderID]
WHERE [h].[CustomerID] = 29712
OPTION (FORCE ORDER);
GO


/*
	Force the order AND force a loop
*/
SELECT 
	[h].[CustomerID], 
	[d].[ProductID]
FROM [Sales].[SalesOrderDetail] [d] 
INNER LOOP JOIN [Sales].[SalesOrderHeader] [h] 
	ON [d].[SalesOrderID] = [h].[SalesOrderID]
WHERE [h].[CustomerID] = 29712
OPTION (FORCE ORDER);
GO


/*
	Pay attention to scans in the inner table
	note the cost here...
*/
SELECT 
	[d].[SalesOrderID], 
	[p].[Name], 
	[d].[OrderQty]
FROM [Production].[Product] [p]
INNER JOIN [Sales].[SalesOrderDetail] [d] 
	ON [p].[ProductID] = [d].[ProductID]
WHERE [d].[ProductID] = 870;
GO


/*
	check out our data first
*/
EXEC sp_SQLskills_helpindex 'Sales.SalesOrderHeader';
GO
EXEC sp_SQLskills_helpindex 'Sales.SalesOrderDetail';
GO


SELECT [SalesOrderID]
FROM [Sales].[SalesOrderDetail]
	INTERSECT
SELECT [SalesOrderID]
FROM [Sales].[SalesOrderHeader];
GO

SELECT [SalesOrderID]
FROM [Sales].[SalesOrderDetail]
	EXCEPT
SELECT [SalesOrderID]
FROM [Sales].[SalesOrderHeader];
GO

SELECT [SalesOrderID]
FROM [Sales].[SalesOrderHeader]
	EXCEPT
SELECT [SalesOrderID]
FROM [Sales].[SalesOrderDetail];
GO


/*
	Enable actual plan
	Join on SalesOrderID
	What is the join type and the cost?
	How is the data ordered?
	What indexes are used?
	Many to many?
*/

SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[SalesOrderHeader] [h]
JOIN [Sales].[SalesOrderDetail] [d]	
	ON [h].[SalesOrderID] = [d].[SalesOrderID];
GO

/*
	Create a copy of SalesOrderHeader and SalesOrderDetail
	Change the primary key slightly for SalesOrderDetail
*/

SELECT *
INTO [Sales].[Copy_SalesOrderHeader]
FROM [Sales].[SalesOrderHeader];

ALTER TABLE [Sales].[Copy_SalesOrderHeader] 
ADD  CONSTRAINT [PK_Copy_SalesOrderHeader_SalesOrderID] PRIMARY KEY CLUSTERED 
(
	[SalesOrderID] ASC
)
GO

SELECT *
INTO [Sales].[Copy_SalesOrderDetail]
FROM [Sales].[SalesOrderDetail];
GO

ALTER TABLE [Sales].[Copy_SalesOrderDetail] 
ADD  CONSTRAINT [PK_Copy_SalesOrderDetail_SalesOrderID_SalesOrderDetailID] 
PRIMARY KEY CLUSTERED 
(
	[SalesOrderID] DESC,
	[SalesOrderDetailID] DESC
)
GO


/*
	Same query, but index ordered in reverse 
	What do we expect for join type and indexes?
	What is the cost?
*/
SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[Copy_SalesOrderHeader] [h]
JOIN [Sales].[Copy_SalesOrderDetail] [d]
	ON [h].[SalesOrderID] = [d].[SalesOrderID];
GO

/*
	Drop constraint
*/
ALTER TABLE [Sales].[Copy_SalesOrderHeader] 
	DROP CONSTRAINT [PK_Copy_SalesOrderHeader_SalesOrderID];
GO


/*
	re-run the query...
	what type of join and why?
*/
SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[Copy_SalesOrderHeader] [h]
JOIN [Sales].[Copy_SalesOrderDetail] [d]
	ON [h].[SalesOrderID] = [d].[SalesOrderID];
GO

/*
	re-run the query and force the merge
	how does the data get ordered?
*/
SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[Copy_SalesOrderHeader] [h]
JOIN [Sales].[Copy_SalesOrderDetail] [d]
	ON [h].[SalesOrderID] = [d].[SalesOrderID]
OPTION (MERGE JOIN);
GO


/*
	recreate the clustered index on Copy_SalesOrderHeader...but without a primary key
*/
CREATE CLUSTERED INDEX [CI_Copy_SalesOrderHeader] 
	ON [Sales].[Copy_SalesOrderHeader] ([SalesOrderID] ASC);
GO


/*
	re-run again...
	what's the join type?
	cost?
*/
SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[Copy_SalesOrderHeader] [h]
JOIN [Sales].[Copy_SalesOrderDetail] [d]
	ON [h].[SalesOrderID] = [d].[SalesOrderID];
GO

/*
	force the merge
*/
SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[Copy_SalesOrderHeader] [h]
JOIN [Sales].[Copy_SalesOrderDetail] [d]
	ON [h].[SalesOrderID] = [d].[SalesOrderID]
OPTION (MERGE JOIN);
GO

/*
	compared forced merge against optimizer choice on original table
*/
SELECT 
	[h].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[h].[OrderDate], 
	[h].[CustomerID], 
	[h].[SubTotal]
FROM [Sales].[SalesOrderHeader] [h]
JOIN [Sales].[SalesOrderDetail] [d]
	ON [h].[SalesOrderID] = [d].[SalesOrderID];
GO

/*
	drop our tables
*/
DROP TABLE [Sales].[Copy_SalesOrderHeader];
GO
DROP TABLE [Sales].[Copy_SalesOrderDetail];
GO

/*
	Which is the "build" input?
	Which is the "probe" input?
	What is the cost? Memory grant in KB?
	Did the optimizer make a "good decision?"
*/
SELECT 
	[d].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[d].[ProductID], 
	[d].[OrderQty], 
	[p].[Name], 
	[p].[ListPrice]
FROM [Sales].[SalesOrderDetail] [d] 
INNER JOIN [Production].[Product] [p] 
	ON [d].[ProductID] = [p].[ProductID];
GO

/*
	What if we reversed build/probe?
	Compare the following two queries side-by-side

	Run both at the same time (both queries)
	** Actual Plan **
	Original query and forced
	How does the plan change? The cost? Memory grant?
	note the cost estimates :)
*/
DBCC FREEPROCCACHE;
Go

SELECT 
	[d].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[d].[ProductID], 
	[d].[OrderQty], 
	[p].[Name], 
	[p].[ListPrice]
FROM [Sales].[SalesOrderDetail] [d] 
INNER JOIN [Production].[Product] [p] 
	ON [d].[ProductID] = [p].[ProductID];
GO

SELECT 
	[d].[SalesOrderID], 
	[d].[SalesOrderDetailID], 
	[d].[ProductID], 
	[d].[OrderQty], 
	[p].[Name], 
	[p].[ListPrice]
FROM [Sales].[SalesOrderDetail] [d] 
INNER JOIN [Production].[Product] [p] 
	ON [d].[ProductID] = [p].[ProductID]
OPTION (FORCE ORDER);
GO




/*
	A second query, looking at a lot more data
*/
SELECT
	[CustomerID],
	[OrderDate],
	[ContactPersonID]
FROM [Sales].[Orders]
WHERE [OrderDate] BETWEEN '2016-01-01 00:00:00.000' AND '2016-12-31 23:59:59.997'
ORDER BY [OrderDate]
OPTION (RECOMPILE);
GO


/*
	Check out row and page count
*/
SELECT 
	OBJECT_NAME([p].[object_id]) [TableName], 
	[si].[name] [IndexName], 
	[au].[type_desc] [Type], 
	[p].[rows] [RowCount], 
	[au].total_pages [PageCount]
FROM [sys].[partitions] [p]
JOIN [sys].[allocation_units] [au] ON [p].[partition_id] = [au].[container_id]
JOIN [sys].[indexes] [si] on [p].[object_id] = [si].object_id and [p].[index_id] = [si].[index_id]
WHERE [p].[object_id] = OBJECT_ID(N'Sales.Orders');
GO

/*
	Trick the optimize a bit here and change row count
	**Don't try this at home!!**
*/
UPDATE STATISTICS  [Sales].[Orders]
	WITH ROWCOUNT = 1000000, PAGECOUNT = 15509;
GO

/*
	re-run
*/
SELECT
	[CustomerID],
	[OrderDate],
	[ContactPersonID]
FROM [Sales].[Orders]
WHERE [OrderDate] BETWEEN '2016-01-01 00:00:00.000' AND '2016-12-31 23:59:59.997'
ORDER BY [OrderDate]
OPTION (RECOMPILE);
GO


/*
	How to track these?
	In the default trace...
	C:\Program Files\Microsoft SQL Server\MSSQL14.ROGERS\MSSQL\Log

	Can we get better information through XEvents?  Yes ;)
*/
CREATE EVENT SESSION [SortWarnings] 
	ON SERVER 
	ADD EVENT sqlserver.hash_warning(
		ACTION(
			sqlserver.sql_text,sqlserver.tsql_stack)
			),
	ADD EVENT sqlserver.sort_warning(
		ACTION(
			sqlserver.sql_text,sqlserver.tsql_stack)
			),
	ADD EVENT sqlserver.rpc_completed
ADD TARGET package0.event_file(
	SET filename=N'C:\temp\SortWarnings',max_file_size=(512)
	)
WITH (MAX_MEMORY=4096 KB,EVENT_RETENTION_MODE=ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY=30 SECONDS,MAX_EVENT_SIZE=0 KB,MEMORY_PARTITION_MODE=NONE,
TRACK_CAUSALITY=ON,STARTUP_STATE=OFF)
GO

ALTER EVENT SESSION [SortWarnings]
	ON SERVER
	STATE=START;
GO

/*
	re-run again
	view the data
*/
SELECT
	[CustomerID],
	[OrderDate],
	[ContactPersonID]
FROM [Sales].[Orders]
WHERE [OrderDate] BETWEEN '2016-01-01 00:00:00.000' AND '2016-12-31 23:59:59.997'
ORDER BY [OrderDate]
OPTION (RECOMPILE);
GO


/*
	note that the XE session isn't perfect - you can't rely on sql_text
	either use tsql_stack and remove sql_batch_completed, or,
	remove tsql_stack and sql_text, and keep sql_batch_completed

	clean up 
*/
ALTER EVENT SESSION [SortWarnings]
	ON SERVER
	STATE=STOP;
GO

DROP EVENT SESSION [SortWarnings]
	ON SERVER;
GO

UPDATE STATISTICS  [Sales].[Orders]
	WITH ROWCOUNT = 4595157, PAGECOUNT = 15509;
GO




