Example: Index fragmentation with insert/updates, measuring it and fixing it

This blog shows a very simple example to illustrate data fragmentation, measuring it, seeing behind the scene data and steps to address it. I recommend reading the overall series on index fragementation at

http://blogs.msdn.com/sqlserverstorageengine/archive/tags/Index+Fragmentation+Series/default.aspx

-- TSQL Script

create database indextest

go

use indextest

go

-- create the index after loading the data

drop table t_ci

go

create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))

go

-- load the data

declare @i int

select @i = 0

while (@i < 1000)

begin

insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))

set @i = @i + 1

end

-- find fragmentation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

OUTPUT

-- main cause of fragmentation is that initial page allocation is using mixed extent

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

40 7.25 4 94.127625401532

(1 row(s) affected)

--find extent allocations. Note, first 8 pages are allocated from mixed extents

-- in the output below, I removed some columns that are not relevant here.

dbcc extentinfo (0, 't_ci', 0)

OUTPUT

file_id page_id pg_alloc ext_size object_id index_id partition_number

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1 153 1 1 2137058649 0 1

1 155 1 1 2137058649 0 1

1 156 1 1 2137058649 0 1

1 157 1 1 2137058649 0 1

1 158 1 1 2137058649 0 1

1 159 1 1 2137058649 0 1

1 180 1 1 2137058649 0 1

1 181 1 1 2137058649 0 1

1 208 8 8 2137058649 0 1

1 216 8 8 2137058649 0 1

1 224 5 8 2137058649 0 1

-- this shows all the pages alllocated to t_ci

-- I looked at the pages and I found total of 30 pages with 4 fragements. So the average size is 7.5

dbcc ind ('indextest', 't_ci', 0)

-- create the clustered index

create clustered index ci on t_ci(c1)

go

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

OUTPUT

-- you don't see any fragementation because SQL Server allocates uniform extents.

-- In the output below, I removed the row for non-leaf index pages

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

0 28 1 98.3726958240672

-- this shows all the pages alllocated to t_ci

-- I looked at the pages and I found that logical/physical order was perfactly alinged.

-- also, all pages were allocated from uniform extents except for PFS page.

dbcc ind ('indextest', 't_ci', 1)

-- here is the output of page allocations

dbcc extentinfo (0, 't_ci', 1)

file_id page_id pg_alloc ext_size object_id index_id partition_number

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1 183 1 1 2137058649 1 1

1 232 8 8 2137058649 1 1

1 240 8 8 2137058649 1 1

1 248 8 8 2137058649 1 1

-- create the index and load the data. This is different from the previous example

-- as here we create the index on the empty table and then load the data.

drop table t_ci

go

create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))

go

-- create the clustered index

create clustered index ci on t_ci(c1)

go

-- load the data

declare @i int

select @i = 0

while (@i < 1000)

begin

insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))

set @i = @i + 1

end

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

-- output: again, you see the fragementaion becuase first 8 page allcoations come from mixed page extents

-- This is because when you are loading into empty table, SQL Server cannot predict its size. So

-- it does mixed page allocations. This is different when you create the indec

-- after loading the data because at that time it knows the size of the index.

-- Also note that the average used percentage in pages was close to 100%. This means that

-- the pages were not split in half. This is one of the optimizations SQL Server does when

-- loading ordered data.

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

14.2857142857143 5.6 5 98.3726958240672

--find extent allocations. Note, first 8 pages are allocated from mixed extents

dbcc extentinfo (0, 't_ci', 1)

file_id page_id pg_alloc ext_size object_id index_id partition_number

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1 180 1 1 5575058 1 1

1 182 1 1 5575058 1 1

1 183 1 1 5575058 1 1

1 142 1 1 5575058 1 1

1 143 1 1 5575058 1 1

1 153 1 1 5575058 1 1

1 154 1 1 5575058 1 1

1 155 1 1 5575058 1 1

1 192 8 8 5575058 1 1

1 200 8 8 5575058 1 1

1 208 5 8 5575058 1 1

drop index ci on t_ci

-- create the clustered index to start with unfragmented data

create clustered index ci on t_ci(c1)

go

-- update all rows such that each row 900 bytes. This will cause page splits thereby

-- lead to fragmentation

update t_ci set c4 = replicate ('b', 1000)

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

-- Severe fragementation because of page splits. Also note that the space usage in each page is around 50%

-- This affects the performance in two ways (1) you will need to read more pages to get to the

-- same number of rows as pages are 1/2 full. (2) Since there are 117 fragements, it will cause

-- more physical IOs if we are scanning for a range of keys.

--

-- Just to emphasize, the fragmentation happens because of DML operations. So if there is very little

-- DML activity on a table, it may not get fragmented. Other important point to understand

-- is that fragementaion ONLY impacts range queries but NOT singleton selects.So again if

-- you determine that most access thru an index are singleton SELECT, there is no performance benefit

-- in defragmenting it except of course you will be able to space by compacting the data on the page.

--

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

40.4332129963899 2.40869565217391 115 50.0636520879664

--find extent allocations. Note, that the pages are allocated from mixed extents only. This

-- is because the table is already bigger than the 8 page threshold

dbcc extentinfo (0, 't_ci', 1)

file_id page_id pg_alloc ext_size object_id index_id partition_number

----------- ----------- ----------- ----------- ----------- ----------- ----------------

1 183 1 1 5575058 1 1

1 440 8 8 5575058 1 1

1 448 8 8 5575058 1 1

1 456 8 8 5575058 1 1

1 464 8 8 5575058 1 1

1 472 8 8 5575058 1 1

1 480 8 8 5575058 1 1

1 488 8 8 5575058 1 1

1 496 8 8 5575058 1 1

1 504 8 8 5575058 1 1

1 512 8 8 5575058 1 1

1 520 8 8 5575058 1 1

1 528 8 8 5575058 1 1

1 536 8 8 5575058 1 1

1 544 8 8 5575058 1 1

1 552 8 8 5575058 1 1

1 560 8 8 5575058 1 1

1 568 8 8 5575058 1 1

1 576 7 8 5575058 1 1

-- you can use the following command to show all the pages alllocated to t_ci and their links.

dbcc ind ('indextest', 't_ci', 1)

-- Now do an index defrag.

alter index ci on t_ci reorganize

-- measure the fragementation

select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent

from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')

-- note that the fragmentation was reduced significantly and the space usage on pages is around 100%. In other

-- words, the defragementation makes the physical order same as the logical order and compacts the pages.

-- The reason you see the fragementation is that the 'Reorganize' command does not allocate new extents

-- as index rebuild command. It works with the already allocated pages and moves the data around and then

-- deallocates pages not needed. If you choose to 'rebuild' the index, then the fragmentation will be completely

-- removed but you must question if it is worth it? Index Reorganize takes very little space (one extra page) and

-- breaks the work into smaller transactions. So you minimze large log chain and space overhead.On the flip side,

-- it is single threaded and fully logged operation.

avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count avg_page_space_used_in_percent

---------------------------- -------------------------- -------------------- ------------------------------

3.4965034965035 10.2142857142857 14 98.3726958240672