Source code for ts_core.config.parser

"""

.. _ts_config_parser:

Excel Configuration Parser Internals
------------------------------------

This module handles all of the excel configuration parsing.

Guidelines on excel sheet formatting is as follows:

+------------------+--------------------+------------------+--------------------+--------------------+
|Column A          | Column B           |Column C          | Column E           | Column F           |
+==================+====================+==================+====================+====================+
|SUMO Attribute    |SUMO File where this|Category name     |Units column        |Value fot this attr |
|                  |attribute resides   |                  |                    |                    |
+------------------+--------------------+------------------+--------------------+--------------------+

Attributes can only be read from Row 3 and below (MIN_ROW)

Only the first 1000 rows can have any attributes (MAX_ROW)

Current supported tabs:
* Vehicle Type Customization
* General Settings
* Intersection Definition
* Branch Settings


Current supported Intersection Types:
* Cross
* T
* Y

Cross intersection has been tested. 



"""


from openpyxl import *
import json
import xml.etree.ElementTree as ET
import xml.dom.minidom

SUMO_ATTR_COLUMN = 0  # Column A
SUMO_FILE_COLUMN = 1  # Column B
CATEGORY_COLUMN = 2
UNITS_COLUMN = 4  # Column E
VALUE_COLUMN = 5  # Column F for single entry data

MIN_ROW = 3  # Minimum row number where config information CAN be stored
MAX_ROW = 1000  # Max row where data is stored TODO update?

UNIT_CONVERT = {
    "time": {"Hours": 60 * 60, "Days": 60 * 60 * 24,
             "Months": 60 * 60 * 24 * 30.41,
             "Years": 60 * 60 * 24 * 30.41 * 12},  # How many seconds per time unit

    "angle": {"N": 90, "NE": 45, "E": 0, "SE": 315,
              "S": 270, "SW": 225, "W": 180, "NW": 135, },

}
# List of file extensions
ROUTE = "rou"
NET = "net"

SUMOFILE = 0
SUMOATTR = 1
USERVAL = 3  # First user_val
VALUNITS = 2

TAB_NAMES = ["Vehicle Type Customization", "General Settings", "Intersection Definition", "Branch Settings"]

ITYPE_BRANCHES = {
    "Cross": 4,
    "T": 3,
    "Y": 3,
}

OUTPUT_DICT = {
    "SUMOCFG": {},
    "Branches": {},
    "Intersections": {},
}


[docs]def parse_general(sheet): """ Parses the given sheet assuming it is the General Settings sheet Adds parsed data to OUTPUT_DICT global variable Parameters ---------- sheet: openpyxl sheet - The General Settings tab Returns ------- String - The name of the current configuration """ # Get the Configuration name. input_config_name = get_input_from_row(sheet, MIN_ROW) # Configuration Name # Write Config names to the output dict if input_config_name[SUMOFILE] == "SUMOCFG" and input_config_name[SUMOATTR] == "input": config_name = input_config_name[USERVAL] OUTPUT_DICT["SUMOCFG"]["input"] = {"net_file": "%s.net.xml" % config_name, "route_files": "%s.rou.xml" % config_name, "additional_files": "%s.add.xml" % config_name} else: print("Config name Error") # TODO fill out # Simulation length input_runtime_length = get_input_from_row(sheet, MIN_ROW + 1) if input_runtime_length[SUMOFILE] == "SUMOCFG" and input_runtime_length[SUMOATTR] == "time.end": sim_seconds = input_runtime_length[USERVAL] * UNIT_CONVERT["time"][ input_runtime_length[VALUNITS]] # TODO check if units is in time_convert? OUTPUT_DICT["SUMOCFG"]["time"] = {"begin": "0", "end": "%d" % sim_seconds} else: print("TIME ERROR", input_runtime_length) # TODO fill out OUTPUT_DICT["SUMOCFG"]["gui_only"] = {"gui_settings_file": "gui-settings.cfg"} # Overall Traffic Demand # TODO Where does this go in the xmls if anywhere? # input_traffic_demand = get_input_from_row(sheet, MIN_ROW+2) return config_name
[docs]def parse_intersection(sheet): """ Parses the given sheet assuming it is the Intersections sheet Adds parsed data to OUTPUT_DICT global variable Parameters ---------- sheet: openpyxl sheet - The General Intersection Settings tab Returns ------- String - The selected intersection type. Key in ITYPE_BRANCHES """ # Intersection type (Cross, T, Y, etc) input_intersection_type = get_input_from_row(sheet, MIN_ROW) intersection_type = input_intersection_type[USERVAL] if intersection_type not in ITYPE_BRANCHES.keys(): print("Unrecognized Intersection type: %s" % intersection_type) return None OUTPUT_DICT["Intersections"]["I0"] = {} # TODO Add in support for multiple intersections. # Initialize the intersection in The output Dict intersection_name = "I%d" % 0 OUTPUT_DICT["Intersections"][intersection_name]["id"] = 0 OUTPUT_DICT["Intersections"][intersection_name]["type"] = "traffic_light" return intersection_type
[docs]def parse_branches(sheet, intersection_type): """ Parses the given sheet assuming it is the branches sheet Fills out OUTPUT_DICT with all information needed to parse the branches of the intersection. Parameters ---------- sheet: openpyxl sheet - The General Branches Settings tab intersection_type: - enum specified in ITYPE_BRANCHES specifying the current intersection type (i.e cross, T, Y) Returns ------- None """ # Get the number of branches num_branches = ITYPE_BRANCHES[intersection_type] # Initialize the output dictionary with the branches for branch_num in range(1, num_branches + 1): b_name = "B%d" % branch_num # Branch name is Bn where n is the branches ID OUTPUT_DICT["Branches"][b_name] = {} for row in range(MIN_ROW, MAX_ROW): # For each row that may contain data if row_has_data(sheet, row): # Check if this row has data input_data = get_input_from_row(sheet, row, num_branches) # Get the data units = input_data[VALUNITS].lower() # Fill out OUTPUT_DICT with data from each branch for this attribute for branch_num in range(0, num_branches): b_name = "B%d" % (branch_num + 1) user_value = input_data[USERVAL + branch_num] if units in UNIT_CONVERT.keys(): user_value = UNIT_CONVERT[units][user_value] # TODO normalize the data using the units # TODO verify inbound lanes sum up to numLanes # write to output OUTPUT_DICT["Branches"][b_name][input_data[SUMOATTR]] = user_value
[docs]def parse_stats(sheet): """ Parses the given sheet assuming it is the advanced customization sheet The parsed data is stored as a xml.etree.ElementTree element. This becomes the stats file for SUMO Parameters ---------- sheet: The advanced customization sheet Returns ------- "xml.etree.ElementTree" node containing the full statistics file information """ # TODO Bus Stops is not currently supported. category_translate = {"Work Hours": "workHours", "City Gates": "cityGates", "Bus Stations": "busStations", "Bus Lines": "busLines"} expected_entries = {"bracket": 3, "opening": 2, "closing": 2, "street": 3, "entrance": 4, "school": 7, "busStation": 3} sub_tags = {"population": "bracket", "workHours": "opening", "streets": "street", "cityGates": "entrance", "schools": "school", "busStations": "busStation"} root = ET.Element("city") current_category = None category_root = None data = None num_entries = 15 # Maximum number of entries for any category TODO global for row in range(MIN_ROW, 100): # prettyprint(root) if row_has_category(sheet, row) is not None: # If a category has been parsed, put it in the xml if data is not None: for tag in data.keys(): attributes = data[tag] if "TAG" in attributes.keys(): sub_tag = attributes["TAG"].lower() del attributes["TAG"] else: sub_tag = tag.split("_")[0] if sub_tag in expected_entries.keys() and len(attributes) < expected_entries[sub_tag]: continue ET.SubElement(category_root, sub_tag, attrib={attr: str(attributes[attr]) for attr in attributes}) # prettyprint(root) current_category = row_has_category(sheet, row) if current_category in category_translate.keys(): current_category = category_translate[current_category] else: current_category = current_category.lower().strip(" ") category_root = ET.SubElement(root, current_category) data = None continue elif category_root is None: continue if current_category in ["general", "parameters"]: if row_has_data(sheet, row): input_data = get_input_from_row(sheet, row, cols=1) category_root.attrib[input_data[SUMOATTR]] = str(input_data[USERVAL]) elif current_category in ["population", "workHours", "streets", "cityGates", "schools", "busStations"]: sub_tag = sub_tags[current_category] if row_has_data(sheet, row): input_data = get_input_from_row(sheet, row, cols=100) if data is None: data = {"%s_%d" % (sub_tag, i): {} for i in range(0, num_entries)} for i in range(0, len(input_data) - 3): attribute = input_data[SUMOATTR] if attribute == "edge2": # Deal with inbound/outbound attributes since sumo uses i and o notation data["%s_%d" % (sub_tag, i)]["edge"] += {"Inbound": "i", "Outbound": "o"}[ input_data[USERVAL + i]] else: data["%s_%d" % (sub_tag, i)][attribute] = str(input_data[USERVAL + i]) # Workhours is implicitly supported # elif current_category == "workHours": # pass return root
[docs]def prettyprint(root, print_it=False, supress_errors=True): """ Attempts to create a human readable string out of a given xml STRING Parameters ---------- root: String containing valid xml data print_it: Tells the function whether or not it should also print out the xml supress_errors: Tells the function to NOT print out errors if given data was invalid Returns ------- String containing the formatted xml data if possible """ try: xml1 = xml.dom.minidom.parseString(ET.tostring(root, encoding='utf8', method='xml').decode()) formatted_xml = xml1.toprettyxml() if print_it: print(formatted_xml) return ET.tostring(formatted_xml, encoding='utf8', method='xml') except Exception as e: if supress_errors is False: print(e) print('Function "prettyprint()" received invalid xml data')
# print(root)
[docs]def row_has_data(sheet, row_num): """ Checks whether or not this row contains a user-entered value. Parameters ---------- sheet: openpyxl sheet - The sheet to use row_num: The row to analyze Returns ------- True if there is data to be analyzed False otherwise """ # Get the data from the row rows = sheet.iter_rows(min_col=1, min_row=row_num, max_col=15, max_row=row_num) for row in rows: # FIXME Is there a better way to look at just 1 row # if there is no SUMO Attribute specified, then there is no user entered data available sumo_attr = row[SUMO_ATTR_COLUMN].value if sumo_attr is None: return False else: return True # should not be executed print("ERROR - row_has_data() did not find row") return False
[docs]def row_has_category(sheet, row_num): """ Checks to see if this row starts a new category Parameters ---------- sheet: openpyxl sheet - The sheet to use row_num: The row to analyze Returns ------- True if there is a new category False otherwise """ # Get the data from the row rows = sheet.iter_rows(min_col=1, min_row=row_num, max_col=15, max_row=row_num) for row in rows: # FIXME Is there a better way to look at just 1 row # If there is text in the Category Column then it is a new category class_name = row[CATEGORY_COLUMN].value if class_name is None: return None else: return class_name # should not be executed print("ERROR - row_has_category() did not find any rows") return None
[docs]def get_input_from_row(sheet, row_num, cols=1): """ Retrieves the input from the row and returns a tuple of all the relevant data Parameters ---------- sheet: The sheet to read row_num: The row number to use cols: The number of columns to the RIGHT of row_num to return Returns ------- Tupple containing the data - formatted as (sumo_file, sumo_attribute, units, user_val1, user_val2...) """ output = [] rows = sheet.iter_rows(min_col=1, min_row=row_num, max_col=VALUE_COLUMN + cols, max_row=row_num) for row in rows: # Determine the SUMO_FILE attribute sumo_file = row[SUMO_FILE_COLUMN].value if sumo_file is None: sumo_file = "N/A" output.append(sumo_file) # Determine the SUMO_ATTR attribute sumo_attr = row[SUMO_ATTR_COLUMN].value if sumo_attr is None: sumo_attr = "N/A" output.append(sumo_attr) # Determine the units units = row[UNITS_COLUMN].value # TODO check if user entered units if units is None: units = "N/A" output.append(units) # Determine all user entered values for col in range(VALUE_COLUMN, VALUE_COLUMN + cols): user_value = row[col].value if user_value is None: break output.append(user_value) return tuple(output)
[docs]def run_parser(excel_file_path, outpath): """ Main function to actually run the whole parser Parameters ---------- excel_file_path: FULL file path to the excel sheet outpath: File to output the temporary json file Returns ------- In order: 1) config_name: The name for the current configuation 2) OUTPUT_DICT: python dictionary containing all relevant information 3) Stats file xml """ wb = load_workbook(filename=excel_file_path, data_only=True) config_name = parse_general(wb["General Settings"]) type = parse_intersection(wb["Intersection Settings"]) if parse_intersection is None: print("ERROR") return -1 parse_branches(wb["Branch Settings"], type) # Context manager to dump parsed config to json with open(outpath + "/" + config_name + "_parsed.json", 'w') as outfile: json.dump(OUTPUT_DICT, outfile, indent=4, ) stats_root_node = parse_stats(wb["Advanced Customization"]) stats_xml = xml.dom.minidom.parseString(ET.tostring(stats_root_node, encoding='utf8', method='xml').decode()) return config_name, OUTPUT_DICT, stats_xml.toprettyxml()
if __name__ == "__main__": run_parser('Configuration_template.xlsx', './test_dir')