Thursday, April 14, 2016

Data Discovery: Record Counts


This tool outputs a table that lists record counts for each table and feature class found in the input workspace tree. The script walks through subdirectories to discover items. The tool searches for tables and feature classes within all folders and geodatabases, including feature datasets.


Data types supported are shapefiles, stand-alone tables, Excel spreadsheets, and all levels of ESRI geodatabases.


Note that Excel spreadsheet record counts may be approximate because of ArcGIS limitations. Also, only single-table tabs and named ranges are supported. Table objects are not supported.


The output table contains the following columns:


  1. Class_Name (unqualified feature class or table name)

  2. Data_Type (data type per Describe)

  3. Record_Count (table record count)

  4. Path (Path + Table_Name = valid path)

  5. Table_Name (fully-qualified feature class or table name)


This tool provides a quick record count of data present in a workspace tree, so is useful in the discovery phase of projects. It can also be used in scripting to provide paths on selected records, as the Path + Table_Name can be concatenated to create complete catalog paths.


Illustration

Usage

Syntax


WorkspaceRowCounts_mytools (Input_Workspace, Output_Workspace, Output_Table_Name)

Parameter Explanation Data Type
Input_Workspace Dialog Reference

Top of workspace tree to search


Python Reference

Top of workspace tree to search

Workspace
Output_Workspace Dialog Reference

Output workspace


Python Reference

Output workspace

Workspace
Output_Table_Name Dialog Reference

output table name


Python Reference

Output workspace

String

Code Samples

ListRecordCounts.py

This is the source code for this tool.


import arcpy, traceback, sys, os
arcpy.env.overwriteOutput = True

def add_fields(Input_Layer, field_name_list):
    '''Quick, hard-coded way to add fields to a table, 
    example below shows one way to manage field names for easier coding.
    Create the field name list in your main code, then paste below for reference.
        # NAMES FOR FIELDS TO ADD
        #                     0            1              2          3           4 
        tableheaders = ["Class_Name", "Data_Type", "Record_Count", "Path", "Table_Name"]
    Create table syntax:  CreateTable_management (out_path, out_name, {template}, {config_keyword})
    '''
    # CREATE VALUE TABLE FOR FIELDS TO ADD
    # ADDROW FORMAT: Name Type Length - Length is optional
    vtab = arcpy.ValueTable(3)
    vtab.addRow(field_name_list[0] + " Text 100") 
    vtab.addRow(field_name_list[1] + " Text 15") 
    vtab.addRow(field_name_list[2] + " Long") 
    vtab.addRow(field_name_list[3] + " Text 255") 
    vtab.addRow(field_name_list[4] + " Text 100") 
    
    x = 0
    # LOOP THROUGH THE VALUE TABLE
    while x < vtab.rowCount:
        if (vtab.getValue(x, 1) == "Text"):
            arcpy.AddField_management(Input_Layer, vtab.getValue(x, 0), vtab.getValue(x, 1), field_length=(int(vtab.getValue(x, 2))))
        else:
            arcpy.AddField_management(Input_Layer, vtab.getValue(x, 0), vtab.getValue(x, 1))
        arcpy.AddMessage("     Added field " + vtab.getValue(x, 0) + ".")
        x += 1
        
def insert_rows(in_layer, field_list, rows_to_insert):
    # inserts a python list of lists containing comma-delimited rows of data to insert.
    # Parameters: layer to insert rows into, list of field name strings, data rows.
    cursor = arcpy.da.InsertCursor(in_layer, field_list)
    for row in rows_to_insert:
        cursor.insertRow(row)
    del cursor

if __name__ == "__main__":

    in_workspace = arcpy.GetParameterAsText(0) # Workspace
    out_workspace = arcpy.GetParameterAsText(1) # Workspace
    out_table_name = arcpy.GetParameterAsText(2) # String

    try:

        arcpy.AddMessage(" ")

        # CREATE OUTPUT TABLE
        out_table = arcpy.CreateTable_management (out_workspace, out_table_name)
        
        # CREATE THE COLUMN HEADINGS LIST
        #                     0            1              2          3           4 
        tableheaders = ["Class_Name", "Data_Type", "Record_Count", "Path", "Table_Name"]
        
        # ADD THE COLUMN HEADINGS TO THE OUTPUT TABLE
        add_fields(out_table, tableheaders)

        arcpy.env.workspace = in_workspace
        nullvalues = ["", " ", None, 0]

        # COLLECT FEATURE CLASSES AND TABLES FROM WORKSPACE TREE
        feature_classes = []
        walk = arcpy.da.Walk(in_workspace, datatype=['FeatureClass', 'Table'])
        for dirpath, dirnames, filenames in walk:
            for filename in filenames:
                feature_classes.append(os.path.join(dirpath, filename))
                
        if len(feature_classes) > 0:
        
            arcpy.AddMessage("     Compiling row counts ...")
            rows_to_add = []
            this_line = []

            # ITERATE THROUGH FEATURE CLASSES AND TABLES
            for fc_path in feature_classes:
                arcpy.AddMessage("     " + str(fc_path))
                try:
                    desc = arcpy.Describe(fc_path)
                    lfields=arcpy.ListFields(fc_path)
                    class_name = desc.baseName
                    if len(class_name.split(".")) > 1: # USE UNQUALIFIED TABLE NAME
                        class_name = class_name.split(".")[int(len(class_name.split("."))-1)]
                    # IF THIS IS AN EXCEL TABLE ...
                    if class_name[-2:] == "$'" or class_name[-1:] == "$" or desc.path[-4:] in [".xls", "xlsx"]:
                        class_name = class_name.strip("'")
                        high_num = 0
                        rowCount = 0
                        this_line = []
                        for i, f in enumerate(lfields):
                            # GET RECORD COUNT FROM COLUMN WITH HIGHEST NUMBER OF POPULATED ROWS
                            rowCount = 0
                            with arcpy.da.SearchCursor(fc_path, "*") as cur:
                                for row in cur:
                                    v = row[i]
                                    if not(v in nullvalues):
                                        rowCount += 1
                            if rowCount > high_num:
                                high_num = rowCount
                        if high_num == 0:
                            continue
                        else:
                            this_line.append(class_name)
                            this_line.append(desc.dataType)
                            this_line.append(high_num)
                            this_line.append(desc.path)
                            this_line.append(class_name)
                            rows_to_add.append(this_line)
                    else: # OTHERWISE ...
                        for f in lfields:
                            # GET RECORD COUNT FROM OID COLUMNS
                            if f.type == "OID":
                                this_line = []
                                rowCount = 0
                                with arcpy.da.SearchCursor(fc_path, f.name) as cur:
                                    for row in cur:
                                        v = row[0]
                                        if not(v in nullvalues):
                                            rowCount += 1
                                # COLLECT DATA TO OUTPUT
                                this_line.append(class_name)
                                this_line.append(desc.dataType)
                                this_line.append(rowCount)
                                this_line.append(desc.path)
                                this_line.append(desc.baseName)
                                rows_to_add.append(this_line)
                except:
                    arcpy.AddMessage("ERROR, skipping...")
                    continue
                        
            # INSERT DATA INTO OUTPUT TABLE            
            insert_rows(out_table, tableheaders, rows_to_add)
            
        arcpy.AddMessage(" ")
        
        # SET OUTPUT PARAMETER
        arcpy.SetParameter(3, out_table)
        
    except:

        # PRINT ERROR MESSAGES
        tb = sys.exc_info()[2]
        tbinfo = traceback.format_tb(tb)[0]
        pymsg = tbinfo + "\n" + str(sys.exc_type)+ ": " + str(sys.exc_value)
        arcpy.AddError("Python Messages: " + pymsg + " GP Messages: " + arcpy.GetMessages(2))
        
    finally:
        del in_workspace

Tags


fields, table, Walk, Describe, enumerate

Credits

Ruth Bowers, 2016


Use limitations

Tested as script tool in ArcGIS 10.2


No comments:

Post a Comment