Friday, April 15, 2016

Data Discovery: List Contents

Summary


This tool lists the contents of the input workspace tree, outputting to a comma-delimited text file. This tool lists both ArcGIS data types, such as feature classes and maps, as well as system data types, such as files and folders. The output table columns are:


  1. Path

  2. Data_Type

  3. Name

  4. Description


The Description column is primarily for subsequent user input. The root directory of the input workspace tree is noted in this column.


Note that the output is not sorted. To group data by folder and workspace in alphabetical order, sort on the Path column.


This tool is useful for documenting data and for data discovery. Use the Description column for note-taking.


Illustration

List Workspace Contents tool illustration

Usage


Syntax


ListContents_mytools (Input_Workspace, Output_CSV_File)

Parameter Explanation Data Type
Input_Workspace Dialog Reference

Input workspace


Python Reference

Input workspace

Workspace
Output_CSV_File Dialog Reference

Output comma-delimited text file. For best results, use CSV extension.


Python Reference

Output comma-delimited text file. For best results, use CSV extension.

File

Code Samples

ListWorkspaceContents.py

See below for the tool source code.


import arcpy, traceback, sys, os, csv
arcpy.env.overwriteOutput = True

def dummyfunction():
    pass

if __name__ == "__main__":

    in_workspace = arcpy.GetParameterAsText(0) # WORKSPACE
    out_csv = arcpy.GetParameterAsText(1) # FILE

    try:

        arcpy.AddMessage(" ")
        
        arcpy.env.workspace = in_workspace
        
        # COLLECT FEATURE CLASSES AND TABLES FROM WORKSPACE TREE.
        item_list = []
        walk = arcpy.da.Walk(in_workspace, datatype=['CadDrawing', 'Container', 'FeatureClass', 'Map', 'RasterCatalog', 'RasterDataset', 'Table', 'Terrain', 'Text', 'TIN', 'Toolbox'])
        for dirpath, dirnames, filenames in walk:
            for filename in filenames:
                item_list.append(os.path.join(dirpath, filename))
                
        if len(item_list) > 0:

            # CREATE OUTPUT TABLE AND COLUMN HEADERS.
            with open(out_csv, "wb") as csvfile:
                tableheaders = ["Path", "Data_Type", "Name", "Description"]
                writer = csv.writer(csvfile)
                writer.writerow(tableheaders)
            
                # ITERATE THROUGH FEATURE CLASSES AND TABLES.
                for item_path in item_list:
                    arcpy.AddMessage(str(item_path))
                    # FILTER OUT LAYER FILES AND PUT LESS INFO SINCE LAYER FILES 
                    # HANG ON DESCRIBE SOMETIMES.
                    if str(item_path)[-3:].lower() == "lyr":
                        writer.writerow([str(item_path), "Layer", None, None])
                    else:
                        desc = arcpy.Describe(item_path)
                        writer.writerow([str(item_path), desc.dataType, desc.Name, None])


                # ITERATE THROUGH FOLDERS AND FILES.
                # START WITH ROOT DIRECTORY/INPUT WORKSPACE.
                desc = arcpy.Describe(in_workspace)
                data_type = ""
                cat_path = ""
                if hasattr(desc, "dataType"):
                    data_type = desc.dataType
                if hasattr(desc, "catalogPath"):
                    cat_path = str(desc.catalogPath)
                writer.writerow([cat_path, data_type, desc.Name, "Root Directory"])
                
                # ITERATE THROUGH SUBDIRECTORIES, COLLECTING DESIRED ITEMS.
                # THAT BEING ITEMS THAT WERE NOT COLLECTED IN THE FC/TABLE LOOP.
                # SKIP LAYER AND EXCEL FILES; THEY HAVE ALREADY BEEN COLLECTED.
                data_type_list = ["File", "Folder", "MapDocument", "Workspace", "Toolbox"]
                ext_skip_list = [".lyr", ".xls", "xlsx"]
                for child in desc.children:
                    if child.dataType in data_type_list:
                        if str(child.catalogPath)[-4:].lower() not in ext_skip_list:
                            descChild = arcpy.Describe(child.catalogPath)
                            arcpy.AddMessage(str(child.catalogPath))
                            writer.writerow([str(child.catalogPath), child.dataType, child.Name, None])
                            for grandKid in descChild.children:
                                if grandKid.dataType in data_type_list:
                                    if not(str(grandKid.catalogPath)[-4:].lower() in ext_skip_list):
                                        descGrandChild = arcpy.Describe(grandKid.catalogPath)
                                        arcpy.AddMessage(str(grandKid.catalogPath))
                                        writer.writerow([str(grandKid.catalogPath), grandKid.dataType, grandKid.Name, None])
                                        for greatGrandKid in descGrandChild.children:
                                            if greatGrandKid.dataType in data_type_list:
                                                if not(str(greatGrandKid.catalogPath)[-4:].lower() in ext_skip_list):
                                                    arcpy.AddMessage(str(greatGrandKid.catalogPath))
                                                    writer.writerow([str(greatGrandKid.catalogPath), greatGrandKid.dataType, greatGrandKid.Name, None])
                        
        arcpy.AddMessage(" ")
        
        # SET OUTPUT PARAMETER.
        arcpy.SetParameter(2, out_csv) # TABLEVIEW
        
    except:

        # PRINT ERROR MESSAGES.
        tb = sys.exc_info()[2]
        tbinfo = traceback.format_tb(tb)[0]
        pymsg = tbinfo + "\n" + str(sys.exc_type)+ ": " + str(sys.exc_value)
        arcpy.AddError("Python Messages: " + pymsg + " GP Messages: " + arcpy.GetMessages(2))
        
    finally:
        del in_workspace, out_csv, csvfile, writer
        del tableheaders, item_list, walk
        del dirpath, dirnames, filenames

Tags


Describe, Walk, csv, children

Credits


Ruth Bowers, 2015, 2016,


Use limitations


Tested in ArcGIS 10.2.2 as a python script tool in a custom toolbox.


No comments:

Post a Comment