Friday, November 4, 2011

How Much Data Is In That Column?

This is a special-purpose script, but it still may be useful to someone, someday.

Columns

Imagine you have many tables with similar filenames and the same column names. This might happen if the tables are created automatically by a Job on a daily basis, for example. For each column you specify, this script displays the declared width, the average number of characters stored, and the minimum and maximum number of characters stored, and it does this across all tables matching a LIKE pattern than you also specify.

This script might be useful if you wanted to see the average "fullness" of each column, or how big the widest value is for a given column. This might be good to know if you're planning to copy data from this column to another table, for example.


-------------------------------------------------------------------------------
/* Sample setup code:                                                          
                                                                               
 USE AdventureWorks                                                            
                                                                               
 IF OBJECT_ID('dbo.Invoice_2011_10') IS NOT NULL DROP TABLE dbo.Invoice_2011_10
 IF OBJECT_ID('dbo.Invoice_2011_11') IS NOT NULL DROP TABLE dbo.Invoice_2011_11
 IF OBJECT_ID('dbo.Invoice_2011_12') IS NOT NULL DROP TABLE dbo.Invoice_2011_12
                                                                               
 CREATE TABLE dbo.Invoice_2011_10 (AccountNumber INT, Comment NVARCHAR(50))    
 CREATE TABLE dbo.Invoice_2011_11 (AccountNumber INT, Comment NVARCHAR(50))    
 CREATE TABLE dbo.Invoice_2011_12 (AccountNumber INT, Comment NVARCHAR(50))    
                                                                               
 INSERT dbo.Invoice_2011_10 (AccountNumber, Comment) VALUES (  2, 'ABC')       
 INSERT dbo.Invoice_2011_11 (AccountNumber, Comment) VALUES (234, 'ABCDEF')    
 INSERT dbo.Invoice_2011_12 (AccountNumber, Comment) VALUES (234, 'ABCDEFGHI') 
                                                                               
 -- Now set the values as follows, and run this script.                        
 --     @SchemaName  is 'dbo'                                                  
 --     @TableFormat is 'Invoice_2011%'                                        
 --     @tColumns    should contain 'AccountNumber' and 'Comment' *only*       
                                                                               
                                                                             */
-------------------------------------------------------------------------------

SET NOCOUNT ON


-------------------------------------------------------------------------------
-- These are the variables you need to set.  There are no changes necessary    
-- after this section.                                                         
-------------------------------------------------------------------------------

-- The schema for the tables. 
DECLARE @SchemaName sysname = 'dbo'

-- The LIKE format for all the tables you want to examine. 
DECLARE @TableFormat sysname = 'Invoice_2011%'

-- The set of column names in the set of tables you want to report on. 
DECLARE @tColumns TABLE (ColumnName sysname)

INSERT @tColumns
       (ColumnName)
VALUES
       ('AccountNumber')
     , ('Comment')
        

-------------------------------------------------------------------------------
-- Variables (you don't need to change these).                                 
-------------------------------------------------------------------------------

DECLARE @TableName    sysname       = ''
DECLARE @ColumnName   sysname       = ''

DECLARE @Sql          NVARCHAR(MAX) = ' '                                 + CHAR(10)
                                    + 'SET NOCOUNT ON'                    + CHAR(10)
                                    + 'SET STATISTICS IO OFF'             + CHAR(10)
                                    + ' '                                 + CHAR(10)
                                    + 'DECLARE @tTotals TABLE '           + CHAR(10)
                                    + '    (ColumnName   sysname,'        + CHAR(10)
                                    + '     Example      NVARCHAR(MAX), ' + CHAR(10)
                                    + '     AverageLen   INT, '           + CHAR(10)
                                    + '     MaximumLen   INT, '           + CHAR(10)
                                    + '     DeclaredSize INT) '           + CHAR(10)

-- The *COLUMNAME* is just a placeholder; each column will call REPLACE with 
-- its name from the set you specified above.  Done this way to keep all the 
-- variables you need to change in one place, and at the top of the file. 
DECLARE @ColumnsSQL   NVARCHAR(MAX) = 'QUOTENAME(''*COLUMNNAME*''), '            + CHAR(10)
                                    + '       MIN([*COLUMNNAME*]), '             + CHAR(10)
                                    + '       AVG(DATALENGTH([*COLUMNNAME*])), ' + CHAR(10)
                                    + '       MAX(DATALENGTH([*COLUMNNAME*])), ' + CHAR(10)

DECLARE @ColLengthSQL NVARCHAR(MAX) = ''


-------------------------------------------------------------------------------
-- Process each table that matches the LIKE expression.                        
-------------------------------------------------------------------------------

RAISERROR('All column sizes are in bytes, not characters.', 10, 1) WITH NOWAIT

DECLARE curTables CURSOR FOR
 SELECT name
   FROM sys.tables
  WHERE name LIKE @TableFormat ESCAPE '$'
    AND type_desc = 'USER_TABLE'
       AND SCHEMA_ID(@SchemaName) = schema_id
  ORDER BY name

OPEN curTables
FETCH NEXT FROM curTables INTO @TableName

WHILE @@FETCH_STATUS = 0
BEGIN
    SET @Sql += ' '                                                                              + CHAR(10)
             +  'RAISERROR('' '', 10, 1) WITH NOWAIT'                                            + CHAR(10)
             +  'RAISERROR(''Table ' + @SchemaName + '.' + @TableName + ''', 10, 1) WITH NOWAIT' + CHAR(10)

    SET @ColLengthSQL = '       MIN(COL_LENGTH(''' 
                      + @SchemaName + '.' + @TableName 
                      + ''', ''*COLUMNNAME*'')) ' + CHAR(10)
    
    -- Process each column in the set. 
    DECLARE curColumns CURSOR FOR
        SELECT ColumnName
          FROM @tColumns
   
    OPEN curColumns
    FETCH NEXT FROM curColumns INTO @ColumnName
              
    WHILE @@FETCH_STATUS = 0
    BEGIN
    
        -- Some types don't support MIN, etc. 
        IF EXISTS (SELECT *
                     FROM sys.columns   c
                     JOIN sys.types     t
                       ON c.system_type_id = t.system_type_id
                    WHERE t.name IN ('uniqueidentifier', 'bit', 'xml')
                      AND c.object_id = OBJECT_ID(@SchemaName + '.' + @TableName)
                      AND c.name = @ColumnName)
        BEGIN
            RAISERROR('Skipping %s.%s.%s because type is unsupported', 10, 1, @SchemaName, @TableName, @ColumnName) WITH NOWAIT
            FETCH NEXT FROM curColumns INTO @ColumnName
            CONTINUE
        END

        -- Build the INSERT statement. 
        SET @Sql += ' '                               + CHAR(10)
                 +  'INSERT @tTotals (ColumnName, '   + CHAR(10)
                 +  '                 Example, '      + CHAR(10)
                 +  '                 AverageLen, '   + CHAR(10)
                 +  '                 MaximumLen, '   + CHAR(10)
                 +  '                 DeclaredSize) ' + CHAR(10)
                 +  'SELECT '
                 
        SET @Sql += REPLACE(@ColumnsSQL + @ColLengthSQL, '*COLUMNNAME*', @ColumnName)
                 +  '  FROM ' + @SchemaName + '.' + @TableName + CHAR(10)
                 +  ' WHERE ' + QUOTENAME(@ColumnName) + ' IS NOT NULL '  + CHAR(10)
                 
        SET @Sql += ' '                                                                + CHAR(10)
                 +  'RAISERROR(''    Column ' + @ColumnName + ''', 10, 1) WITH NOWAIT' + CHAR(10)
        
        FETCH NEXT FROM curColumns INTO @ColumnName
    END
    
    CLOSE curColumns
    DEALLOCATE curColumns

    FETCH NEXT FROM curTables INTO @TableName
END
    
CLOSE curTables
DEALLOCATE curTables


-------------------------------------------------------------------------------
-- Done with the tables.  Display the results.                                 
-------------------------------------------------------------------------------

SET @Sql += ' '                                                     + CHAR(10)
         +  'RAISERROR('' '', 10, 1) WITH NOWAIT'                   + CHAR(10)
         +  ' '                                                     + CHAR(10)
         +  '  SELECT ColumnName          AS [Column Name], '       + CHAR(10)
         +  '         MAX(Example)        AS [Example], '           + CHAR(10)
         +  '         AVG(AverageLen)     AS [Average Length], '    + CHAR(10)
         +  '         MAX(MaximumLen)     AS [Maximum Length], '    + CHAR(10)
         +  '         MIN(DeclaredSize)   AS [Declared Size] '      + CHAR(10)
         +  '    FROM @tTotals '                                    + CHAR(10)
         +  'GROUP BY ColumnName '                                  + CHAR(10)
         +  'ORDER BY ColumnName '                                  
     
EXEC (@Sql)

-- PRINT is limited to 8000 bytes.  There are better solutions out there. 
DECLARE @Idx INT = 1

WHILE @Idx < LEN(@Sql)
BEGIN
    PRINT SUBSTRING(@Sql, @Idx, 4000)
    SET @Idx += 4000
END

0 comments:

Post a Comment