From ef86c7ab8d507ab3cf510e9dff9bfec929f68f8c Mon Sep 17 00:00:00 2001 From: Jacob Daniel Bennett Date: Mon, 12 Oct 2020 11:50:45 -0400 Subject: [PATCH] Error parsing implementation --- api/ECNQueue.py | 211 +++++++++++++++++++++++++----------------------- 1 file changed, 110 insertions(+), 101 deletions(-) diff --git a/api/ECNQueue.py b/api/ECNQueue.py index a98d45f..c42942d 100644 --- a/api/ECNQueue.py +++ b/api/ECNQueue.py @@ -205,19 +205,8 @@ def __parseSections(self) -> list: # Sets the initial message start to the next line after all directory lines and newlines contentStart = contentStart + len(directoryInfo) + 1 - #else: - - # Initialize an empty dictionary for content - #directoryInfo["content"] = {} - - # Find line numbers where sections start - #sectionBoundaries = [ {"start": contentStart} ] - sectionBoundaries = [] - # Set to true if a reply-from-user begining delimiter is parsed. Set to false if the ending delimiter is encountered - #replyFromUserDelimiter = False - # Parses the entire contents of the message, stores everything before any delimiter as the initial message # and the line number of any delimiters for lineNumber in range(contentStart, contentEnd + 1): @@ -233,40 +222,7 @@ def __parseSections(self) -> list: break # Signifies that the inital message has been completely parsed - initialMessageSection = False - - # Stores what line every delimeter starts/ends - #sectionBoundaries.append({"start": lineNumber, "type": "delimiter"}) - - #if replyFromUserDelimiter == False and line.startswith("===") and not line.startswith("===="): - - # Stores what line every delimeter starts/ends - # sectionBoundaries.append({"start": lineNumber, "type": "delimiter"}) - - #replyFromUserDelimiter = True - - # Checks for nesteded delimiters within the reply from user - #elif replyFromUserDelimiter == True and (line.startswith("===") or line.startswith("***")) and not line.startswith("===="): - - #columnNum = 0 - - #errorMessage = "Nested delimiter encountered" - - #errorDictionary = self.__errorParsing(line, lineNumber, columnNum, errorMessage) - - # Appends the error dictionary to sections - #sections.append(errorDictionary) - - # Immediately exits the section parsing function because item content needs to be edited from the cli - #return sections - - #elif replyFromUserDelimiter == False: - - #sectionBoundaries.append({"start": lineNumber}) - - #elif line.startswith("===="): - - #replyFromUserDelimiter = False + initialMessageSection = False elif initialMessageSection == True: @@ -325,32 +281,27 @@ def __parseSections(self) -> list: # Sets line to the first line of the boundary (which is always the delimiter) line = self.__rawItem[boundary["start"]] - - sectionType = None - - # Looks at the begining of line and determines if it starts with any of the delimiters, - # if so, name it accordingly - for delimiter in delimiters: - - if line.startswith(delimiter["pattern"]): - - sectionType = delimiter["name"] - - break # Returns all of the lines within the current section sectionContent = self.__rawItem[boundary["start"] : boundary["end"]] if boundary["type"] == "initial_message": - initialMessageDictionary = self.__initialMessageParsing(boundary["start"], boundary["end"]) + initialMessageDictionary = self.__initialMessageParsing(sectionContent) sections.append(initialMessageDictionary) # Checks for each section type - if sectionType == "edit": + elif boundary["type"] == "edit": # Returns a dictionary with edit information - editInfo = self.__editParsing(line) + editInfo = self.__editParsing(line, boundary["start"]) + # Checks for a parse error and appends it to sections and exits the function + if editInfo["type"] == "parse_error": + + sections.append(editInfo) + + return sections + # Remove the delimiter String and unecessary newlines sectionContent = self.__getFormattedMessageContent(sectionContent) @@ -362,10 +313,16 @@ def __parseSections(self) -> list: continue - elif sectionType == "replyToUser": + elif boundary["type"] == "replyToUser": # Returns a dictionary with reply-to information - replyToInfo = self.__replyToParsing(line) + replyToInfo = self.__replyToParsing(line, boundary["start"]) + + if replyToInfo["type"] == "parse_error": + + sections.append(replyToInfo) + + return sections # Removes the begining delimiter sectionContent = self.__getFormattedMessageContent(sectionContent) @@ -378,10 +335,16 @@ def __parseSections(self) -> list: continue - elif sectionType == "status": + elif boundary["type"] == "status": # Returns a dictionary with status information - statusInfo = self.__statusParsing(line) + statusInfo = self.__statusParsing(line, boundary["start"]) + + if statusInfo["type"] == "parse_error": + + sections.append(statusInfo) + + return sections # Removes the begining delimiter sectionContent = self.__getFormattedMessageContent(sectionContent) @@ -394,10 +357,16 @@ def __parseSections(self) -> list: continue - elif sectionType == "replyFromUser": + elif boundary["type"] == "replyFromUser": # Returns a dictionary with userReply information - replyFromInfo = self.__userReplyParsing(sectionContent) + replyFromInfo = self.__userReplyParsing(sectionContent, boundary["start"]) + + if replyFromInfo["type"] == "parse_error": + + sections.append(replyFromInfo) + + return sections # Appends the replyFrom to sections sections.append(replyFromInfo) @@ -448,7 +417,8 @@ def __directoryParsing(self, directoryStartLine: int) -> dict: if ": " in strippedInfo: # Seperates the directory info line into two variables, the first variable being the key, the second being the value - key, value = strippedInfo.split(": ") + # swt1 + key, value = strippedInfo.split(": ", 1) # Adds the key value pair to the directory info dictionary directoryInformation[key] = value @@ -456,7 +426,7 @@ def __directoryParsing(self, directoryStartLine: int) -> dict: elif ":" in strippedInfo: # Seperates the directory info line into two variables, the first variable being the key, the second being the value - key, value = strippedInfo.split(":") + key, value = strippedInfo.split(":", 1) # Adds the key value pair to the directory info dictionary directoryInformation[key] = value @@ -467,7 +437,7 @@ def __directoryParsing(self, directoryStartLine: int) -> dict: # Returns the directory information dictionary return directoryInformation - def __initialMessageParsing(self, startLine: int, endLine: int) -> dict: + def __initialMessageParsing(self, content: list) -> dict: """Returns a dictionary with initial message information Returns: @@ -479,15 +449,7 @@ def __initialMessageParsing(self, startLine: int, endLine: int) -> dict: "cc": [{email, name}], "content": ["message_content"] """ - initialMessageDictionary = { - #"type": "initial_message", - #"datetime": initialMessageFormattedDate, - #"from_name": self.__parseFromData(data="userName"), - #"from_email": self.__parseFromData(data="userEmail"), - #"to": initialMessageRecipientsSection, - #"cc": initialMessageCCSection, - #"content": initialMessageContent - } + initialMessageDictionary = {} initialMessageDictionary["type"] = "initial_message" @@ -529,14 +491,14 @@ def __initialMessageParsing(self, startLine: int, endLine: int) -> dict: "email": ccRecipients[1]} ) - rawMessageContent = self.__rawItem[startLine : endLine] + #rawMessageContent = self.__rawItem[startLine : endLine] # Removes unecessary newlines from the begining and the end of the initial message - initialMessageDictionary["content"] = self.__getFormattedMessageContent(rawMessageContent) + initialMessageDictionary["content"] = self.__getFormattedMessageContent(content) return initialMessageDictionary - def __editParsing(self, line: str) -> dict: + def __editParsing(self, line: str, lineNum: int) -> dict: """Returns a dictionary with edit information Returns: @@ -546,11 +508,28 @@ def __editParsing(self, line: str) -> dict: formattedDateTime = "" editedBy = "" + if not line.endswith(" ***\n"): + + columnNum = len(line) - 1 + errorMessage = "Expected the delimiter to end with \" ***\n\"" + + return self.__errorParsing(line, lineNum, columnNum, errorMessage) + # Parses for the author of the edit, which is located between the "*** Edited by: " and " at:" substrings editedBy = (re.search("(?<=\*{3} Edited by: )(.*)(?= at:)", line)).group() + + # ece23 + try: + # Parses for the date and time of the edit, which is located between the " at: " and "***\n" substrings + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() - # Parses for the date and time of the edit, which is located between the " at: " and "***\n" substrings - dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + except: + # Returns an error message if there is no space after "at:" + + columnNum = line.find("at:") + 3 + errorMessage = "Expected a space after \"at:\" followed by the date which is followed by \" ***\n\"" + + return self.__errorParsing(line, lineNum, columnNum, errorMessage) # Attempts to format the date and time into utc format formattedDateTime = self.__getFormattedDate(dateTimeString) @@ -564,7 +543,7 @@ def __editParsing(self, line: str) -> dict: return editInfo - def __replyToParsing(self, line: str) -> dict: + def __replyToParsing(self, line: str, lineNum: int) -> dict: """Returns a dictionary with reply to user information Returns: @@ -574,6 +553,15 @@ def __replyToParsing(self, line: str) -> dict: formattedDateTime = "" repliedBy = "" + #tech112 + # Checks for malformed delimiter + if not line.endswith(" ***\n"): + + columnNum = len(line) - 1 + errorMessage = "Expected the delimiter to end with \" ***\n\"" + + return self.__errorParsing(line, lineNum, columnNum, errorMessage) + # Parses for the author of the reply, which is located between the "*** Replied by: " and " at:" substrings repliedBy = (re.search("(?<=\*{3} Replied by: )(.*)(?= at:)", line)).group() @@ -592,7 +580,7 @@ def __replyToParsing(self, line: str) -> dict: return replyInfo - def __statusParsing(self, line: str) -> dict: + def __statusParsing(self, line: str, lineNum: int) -> dict: """Returns a dictionary with status information Returns: @@ -604,7 +592,15 @@ def __statusParsing(self, line: str) -> dict: # Parses for the author of the status change, which is located between the "*** Status updated by: " and " at:" substrings updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", line)).group() + + # tech 56 + if not line.endswith(" ***\n"): + columnNum = len(line) - 1 + errorMessage = "Expected the delimiter to end with \" ***\n\"" + + return self.__errorParsing(line, lineNum, columnNum, errorMessage) + # Parses for the date and time of the status change, which is located between the " at: " and "***\n" substrings dateTimeString = re.search("(?<= at: )(.*)(?= \*\*\*\n)", line).group() @@ -620,7 +616,7 @@ def __statusParsing(self, line: str) -> dict: return statusInfo - def __userReplyParsing(self, replyContent: list) -> dict: + def __userReplyParsing(self, replyContent: list, lineNumber: int) -> dict: """Returns a dictionary with user Reply information information Returns: @@ -632,7 +628,7 @@ def __userReplyParsing(self, replyContent: list) -> dict: subject = "" ccRecipientsList = [] newLineCounter = 0 - + endingDelimiterCount = 0 # Delimiter information line numbers to remove from reply from user linesToRemove =[] @@ -643,20 +639,34 @@ def __userReplyParsing(self, replyContent: list) -> dict: #Checks for a newline and breaks for loop on second occurance of a newline if line == "\n": + newLineCounter = newLineCounter + 1 - if newLineCounter == 2: - break + if line.startswith("===="): + + endingDelimiterCount = endingDelimiterCount + 1 + + if endingDelimiterCount > 1: + + errorMessage = "Encountered two reply-from-user ending delimiters and expected only one" + + return self.__errorParsing(line, lineNumber + lineNum + 1, 0, errorMessage) + + elif endingDelimiterCount == 0 and lineNum == len(replyContent) - 1: + + errorMessage = "Did not encounter a reply-from-user ending delimiter" + + return self.__errorParsing(line, lineNumber + lineNum + 1, 0, errorMessage) # Checks for lines starting with Subject, From, Date and CC - if line.startswith("Subject: "): + elif line.startswith("Subject: ") and newLineCounter < 2: # Matches everything after "Subject: " in the line subject = (re.search("(?<=Subject: )(.*)", line)).group() linesToRemove.append(lineNum) - elif line.startswith("From: "): + elif line.startswith("From: ") and newLineCounter < 2: # Returns a list of tuples with name and email information emailList = email.utils.getaddresses([line]) @@ -669,19 +679,19 @@ def __userReplyParsing(self, replyContent: list) -> dict: linesToRemove.append(lineNum) - elif line.startswith("Date: "): + elif line.startswith("Date: ") and newLineCounter < 2: # Matches everything after "Date: " - try: - dateStr = (re.search("(?<=Date: )(.*)", line)).group() - except: - dateStr = "" + + dateStr = (re.search("(?<=Date: )(.*)", line)).group() + + dateStr = "" # Formatts the date to UTC formattedDateTime = self.__getFormattedDate(dateStr) linesToRemove.append(lineNum) - elif line.startswith("Cc: "): + elif line.startswith("Cc: ") and newLineCounter < 2: # Returns a list of tuples with email information recipientsList = email.utils.getaddresses([line]) @@ -693,8 +703,8 @@ def __userReplyParsing(self, replyContent: list) -> dict: ccRecipientsList.append( {"name":cc[0], "email":cc[1]} - ) - + ) + linesToRemove.append(lineNum) # Deletes reduntant lines from the message content in reverse order @@ -707,7 +717,6 @@ def __userReplyParsing(self, replyContent: list) -> dict: replyFromInfo = { "type": "reply_from_user", "datetime": formattedDateTime, - #"subject": subject, "from_name": repliedByName, "from_email": repliedByEmail, "cc": ccRecipientsList, @@ -975,7 +984,7 @@ def getQueues() -> list: return queues if __name__ == "__main__": - item = Item("ce", 11) + item = Item("aae", 2) print() # for queue in getQueues(): # for item in queue.items: