From 8ef3e06c73c59bb228aab1537f585d0582a848eb Mon Sep 17 00:00:00 2001 From: Jacob Daniel Bennett Date: Wed, 7 Oct 2020 09:02:41 -0400 Subject: [PATCH] Added initial message parser helper function, completely updated dictionary keys to camel case --- api/ECNQueue.py | 280 +++++++++++++++++++++++++++++------------------- 1 file changed, 170 insertions(+), 110 deletions(-) diff --git a/api/ECNQueue.py b/api/ECNQueue.py index 0ee74ca..3537bb3 100644 --- a/api/ECNQueue.py +++ b/api/ECNQueue.py @@ -181,126 +181,99 @@ def __parseSections(self) -> list: contentStart = self.__getHeaderBoundary() + 1 contentEnd = len(self.__rawItem) - 1 - directoryInfo = {"type": "directoryInformation"} + #directoryInfo = {"type": "directoryInformation"} initialMessageContent = [] - endInitialMessage = False + initialMessageSection = True + + # Delimiter info + delimiters = [ + {"name": "edit", "pattern": "*** Edited"}, + {"name": "status", "pattern": "*** Status"}, + {"name": "replyToUser", "pattern": "*** Replied"}, + {"name": "replyFromUser", "pattern": "=== "}, + ] # Checks for Directory Identifiers - if self.__rawItem[contentStart] == "\n" and self.__rawItem[contentStart + 1].startswith("\tName:"): + if self.__rawItem[contentStart] == "\n" and self.__rawItem[contentStart + 1].startswith("\t"): # Parses the directory information and returns a dictionary of directory values - directoryInfo["content"] = self.__directoryParsing(contentStart + 1) + directoryInfo = self.__directoryParsing(contentStart + 1) + + # Appends Directory Information into the sections array + sections.append(directoryInfo) # Sets the initial message start to the next line after all directory lines and newlines - contentStart = contentStart + len(directoryInfo["content"]) + 2 + contentStart = contentStart + len(directoryInfo) + 1 - else: + #else: # Initialize an empty dictionary for content - directoryInfo["content"] = {} - - # Appends Directory Information into the sections array - sections.append(directoryInfo) + #directoryInfo["content"] = {} # Find line numbers where sections start - sectionBoundaries = [ {"start": contentStart} ] + #sectionBoundaries = [ {"start": contentStart} ] + + sectionBoundaries = [] # Set to true if a reply-from-user begining delimiter is parsed. Set to false if the ending delimiter is encountered - replyFromUserDelimiter = False + #replyFromUserDelimiter = False # Parses the entire contents of the message, stores everything before any delimiter as the initial message + # and the line number of any delimiters for lineNumber in range(contentStart, contentEnd + 1): line = self.__rawItem[lineNumber] if line.startswith("***") or line.startswith("===") and not line.startswith("===="): + for delimiter in delimiters: + + if line.startswith(delimiter["pattern"]): + + sectionBoundaries.append({"start": lineNumber, "type": delimiter["name"]}) + break - # Signifies that the inital message has been copletely parsed - endInitialMessage = True + # Signifies that the inital message has been completely parsed + initialMessageSection = False + + # Stores what line every delimeter starts/ends + #sectionBoundaries.append({"start": lineNumber, "type": "delimiter"}) - if replyFromUserDelimiter == False and line.startswith("===") and not line.startswith("===="): + #if replyFromUserDelimiter == False and line.startswith("===") and not line.startswith("===="): # Stores what line every delimeter starts/ends - sectionBoundaries.append({"start": lineNumber}) + # sectionBoundaries.append({"start": lineNumber, "type": "delimiter"}) - replyFromUserDelimiter = True + #replyFromUserDelimiter = True # Checks for nesteded delimiters within the reply from user - elif replyFromUserDelimiter == True and (line.startswith("===") or line.startswith("***")) and not line.startswith("===="): + #elif replyFromUserDelimiter == True and (line.startswith("===") or line.startswith("***")) and not line.startswith("===="): - columnNum = 0 + #columnNum = 0 - errorMessage = "Nested delimiter encountered" + #errorMessage = "Nested delimiter encountered" - errorDictionary = self.__errorParsing(line, lineNumber, columnNum, errorMessage) + #errorDictionary = self.__errorParsing(line, lineNumber, columnNum, errorMessage) # Appends the error dictionary to sections - sections.append(errorDictionary) + #sections.append(errorDictionary) # Immediately exits the section parsing function because item content needs to be edited from the cli - return sections + #return sections - elif replyFromUserDelimiter == False: + #elif replyFromUserDelimiter == False: - sectionBoundaries.append({"start": lineNumber}) + #sectionBoundaries.append({"start": lineNumber}) - elif line.startswith("===="): + #elif line.startswith("===="): - replyFromUserDelimiter = False + #replyFromUserDelimiter = False - elif endInitialMessage == False: + elif initialMessageSection == True: # Delimiter not encountered yet, so append line to initial message list - initialMessageContent.append(line) + sectionBoundaries.append({"start": lineNumber, "type": "initial_message"}) - # Removes unecessary newlines from the begining and the end of the initial message - - initialMessageContent = self.__getFormattedMessageContent(initialMessageContent) - - # Gets the initial message date from the header - initialMessageDateStr = self.__getMostRecentHeaderByType("Date") - - # Formats the initial message date to UTC - initialMessageFormattedDate = self.__getFormattedDate(initialMessageDateStr) - - # Stores list of dictionaries for the recipients of the initial message - initialMessageRecipientsSection = [] - - # Parses the header looking for recipients of the initial message and stores it in a list of tuples - initialMessageRecipientsList = email.utils.getaddresses([self.__getMostRecentHeaderByType("To")]) - - # Parses the CC list and stores the cc recipient information in a list of dictionaries - for recipients in initialMessageRecipientsList: - - initialMessageRecipientsSection.append( - {"name": recipients[0], - "email": recipients[1]} - ) - - # Stores list of dictionaries for CC information - initialMessageCCSection = [] - - # Parses the header looking for CC recipients of the initial message and stores it in a list of tuples - initialMessageCCList = email.utils.getaddresses([self.__getMostRecentHeaderByType("CC")]) - - # Parses the CC list and stores the cc recipient information in a list of dictionaries - for ccRecipients in initialMessageCCList: - - initialMessageCCSection.append( - {"name": ccRecipients[0], - "email": ccRecipients[1]} - ) - - # Appends all initial message information to the sections array - sections.append( - {"type": "initial_message", - "datetime": initialMessageFormattedDate, - "from_name": self.__parseFromData(data="userName"), - "user_email": self.__parseFromData(data="userEmail"), - "to": initialMessageRecipientsSection, - "cc": initialMessageCCSection, - "content": initialMessageContent} - ) + initialMessageSection = False # Assignment Information assignedBy = "" @@ -339,7 +312,7 @@ def __parseSections(self) -> list: sectionBoundaries.append({"start": contentEnd + 1}) - # Set line number where section end + # Sets the end of the section boundary to the begining of the next section boundary for boundaryIndex in range(0, len(sectionBoundaries) - 1): sectionBoundaries[boundaryIndex]["end"] = sectionBoundaries[boundaryIndex + 1]["start"] @@ -347,14 +320,6 @@ def __parseSections(self) -> list: # Remove End of File boundary del sectionBoundaries[-1] - # Different delimiters for different message events - delimiters = [ - {"name": "edit", "pattern": "*** Edited"}, - {"name": "status", "pattern": "*** Status"}, - {"name": "replyToUser", "pattern": "*** Replied"}, - {"name": "replyFromUser", "pattern": "=== "}, - ] - # Parses through all the boundaries in section boundaries for boundary in sectionBoundaries: @@ -376,6 +341,10 @@ def __parseSections(self) -> list: # Returns all of the lines within the current section sectionContent = self.__rawItem[boundary["start"] : boundary["end"]] + if boundary["type"] == "initial_message": + initialMessageDictionary = self.__initialMessageParsing(boundary["start"], boundary["end"]) + sections.append(initialMessageDictionary) + # Checks for each section type if sectionType == "edit": @@ -440,6 +409,7 @@ def __directoryParsing(self, directoryStartLine: int) -> dict: """Returns a dictionary with directory information Returns: dictionary: + "type": "directoryInformation" "Name": name, "Login": login, "Computer": computer, @@ -453,29 +423,118 @@ def __directoryParsing(self, directoryStartLine: int) -> dict: "Host ECNDB": host_ecdbn, "Subject": subject """ - directoryInformation = {} - - directoryEndingLine = directoryStartLine - while directoryEndingLine - directoryStartLine <= 11: + directoryInformation = {"type": "directory_information"} + + # Assumes a full directory with 12 items including the starting line + directoryEndingLine = directoryStartLine + 11 + + # Executies until the directory start line is greater than the directory ending line + while directoryStartLine <= directoryEndingLine: - info = self.__rawItem[directoryEndingLine] + # Returns the line number at directory start line + info = self.__rawItem[directoryStartLine] + # Breaks the loop if it encountrs a newline, signifying the end of the directory information if info == "\n": break - + else: + + # Removes white including space, newlines, and tabs from the directory info line strippedInfo = info.strip() - try: + + # Attempts to find ": " but will accept ":", denoting a blank entry for a directory item + if ": " in strippedInfo: + + # Seperates the directory info line into two variables, the first variable being the key, the second being the value key, value = strippedInfo.split(": ") - except: + + # Adds the key value pair to the directory info dictionary + directoryInformation[key] = value + + elif ":" in strippedInfo: + + # Seperates the directory info line into two variables, the first variable being the key, the second being the value key, value = strippedInfo.split(":") - directoryInformation[key] = value + # Adds the key value pair to the directory info dictionary + directoryInformation[key] = value - directoryEndingLine = directoryEndingLine + 1 + # Counter to denote the end of the directory + directoryStartLine = directoryStartLine + 1 + # Returns the directory information dictionary return directoryInformation + + def __initialMessageParsing(self, startLine: int, endLine: int) -> dict: + """Returns a dictionary with initial message information + + Returns: + dictionary: "type": "initial_message", + "datetime": utcdate, + "from_name": fromName, + "user_email": userEmail, + "to": [{email, name}], + "cc": [{email, name}], + "content": ["message_content"] + """ + initialMessageDictionary = { + #"type": "initial_message", + #"datetime": initialMessageFormattedDate, + #"from_name": self.__parseFromData(data="userName"), + #"user_email": self.__parseFromData(data="userEmail"), + #"to": initialMessageRecipientsSection, + #"cc": initialMessageCCSection, + #"content": initialMessageContent + } + + initialMessageDictionary["type"] = "initial_message" + + # Gets the initial message date from the header + rawMessageDateStr = self.__getMostRecentHeaderByType("Date") + + # Sets datetime in the intialMessage dictionary to UTC formatted date + initialMessageDictionary["datetime"] = self.__getFormattedDate(rawMessageDateStr) + + initialMessageDictionary["from_name"] = self.__parseFromData(data="userName") + + initialMessageDictionary["user_email"] = self.__parseFromData(data="userEmail") + + # Stores list of dictionaries for the recipients of the initial message + initialMessageDictionary["to"] = [] + + # Parses the header looking for recipients of the initial message and stores it in a list of tuples + rawMessageRecipientsList = email.utils.getaddresses([self.__getMostRecentHeaderByType("To")]) + + # Parses the CC list and stores the cc recipient information in a list of dictionaries + for recipients in rawMessageRecipientsList: + + initialMessageDictionary["to"].append( + {"name": recipients[0], + "email": recipients[1]} + ) + + # Stores list of dictionaries for CC information + initialMessageDictionary["cc"] = [] + + # Parses the header looking for CC recipients of the initial message and stores it in a list of tuples + rawMessageCCList = email.utils.getaddresses([self.__getMostRecentHeaderByType("CC")]) + + # Parses the CC list and stores the cc recipient information in a list of dictionaries + for ccRecipients in rawMessageCCList: + + initialMessageDictionary["cc"].append( + {"name": ccRecipients[0], + "email": ccRecipients[1]} + ) + + rawMessageContent = self.__rawItem[startLine : endLine] + + # Removes unecessary newlines from the begining and the end of the initial message + initialMessageDictionary["content"] = self.__getFormattedMessageContent(rawMessageContent) + + return initialMessageDictionary def __editParsing(self, line: str) -> dict: """Returns a dictionary with edit information @@ -497,10 +556,10 @@ def __editParsing(self, line: str) -> dict: formattedDateTime = self.__getFormattedDate(dateTimeString) editInfo = { - "type": "edit", - "datetime": formattedDateTime, - "by": editedBy, - "content": "" + "type": "edit", + "datetime": formattedDateTime, + "by": editedBy, + "content": "" } return editInfo @@ -596,8 +655,6 @@ def __userReplyParsing(self, replyContent: list) -> dict: subject = (re.search("(?<=Subject: )(.*)", line)).group() linesToRemove.append(lineNum) - - continue elif line.startswith("From: "): @@ -612,8 +669,6 @@ def __userReplyParsing(self, replyContent: list) -> dict: linesToRemove.append(lineNum) - continue - elif line.startswith("Date: "): # Matches everything after "Date: " @@ -626,8 +681,6 @@ def __userReplyParsing(self, replyContent: list) -> dict: linesToRemove.append(lineNum) - continue - elif line.startswith("Cc: "): # Returns a list of tuples with email information @@ -643,8 +696,6 @@ def __userReplyParsing(self, replyContent: list) -> dict: ) linesToRemove.append(lineNum) - - continue # Deletes reduntant lines from the message content in reverse order for lineNum in sorted(linesToRemove, reverse = True): @@ -722,13 +773,16 @@ def __errorParsing(self, line: str, lineNum: int, lineColumn: int, errorMessage: "content": [] } - # Error message with line and column numbers + # Error message with itemm line and column numbers errorMessage = errorMessage + " at " + str(lineNum) + ":" + str(lineColumn) + # Appends the error message to the content list in the error dictionary errorDictionary["content"].append(errorMessage) + # Appends the item line to the content list in the error dictionary errorDictionary["content"].append(line) + # returns the error dictionary return errorDictionary def __isLocked(self) -> Union[str, bool]: @@ -919,4 +973,10 @@ def getQueues() -> list: if isDirectory and isValid: queues.append(Queue(file)) - return queues \ No newline at end of file + return queues +if __name__ == "__main__": + item = Item("ce", 11) + print() +# for queue in getQueues(): +# for item in queue.items: +# print(f"${item.queue} ${item.number}") \ No newline at end of file