From 92e11f24daeb2b05ced1823071d3d5ce24ca735e Mon Sep 17 00:00:00 2001 From: Jacob Daniel Bennett Date: Fri, 16 Oct 2020 09:14:39 -0400 Subject: [PATCH] Error parsing detects ending reply-from-user delimters --- api/ECNQueue.py | 152 +++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 79 deletions(-) diff --git a/api/ECNQueue.py b/api/ECNQueue.py index 1093734..8740614 100644 --- a/api/ECNQueue.py +++ b/api/ECNQueue.py @@ -293,7 +293,7 @@ def __parseSections(self) -> list: elif boundary["type"] == "edit": # Returns a dictionary with edit information - editInfo = self.__editParsing(line, boundary["start"]) + editInfo = self.__editParsing(sectionContent, boundary["start"]) # Checks for a parse error and appends it to sections and exits the function if editInfo["type"] == "parse_error": @@ -302,21 +302,13 @@ def __parseSections(self) -> list: return sections - # Remove the delimiter String and unecessary newlines - sectionContent = self.__getFormattedMessageContent(sectionContent) - - # Appends content of the edit message to the dictionary - editInfo["content"] = sectionContent - # Appends the edit dictionary to sections sections.append(editInfo) - continue - elif boundary["type"] == "replyToUser": # Returns a dictionary with reply-to information - replyToInfo = self.__replyToParsing(line, boundary["start"]) + replyToInfo = self.__replyToParsing(sectionContent, boundary["start"]) if replyToInfo["type"] == "parse_error": @@ -324,21 +316,13 @@ def __parseSections(self) -> list: return sections - # Removes the begining delimiter - sectionContent = self.__getFormattedMessageContent(sectionContent) - - # Appends content of the reply-to message to the dicionary - replyToInfo['content'] = sectionContent - # Appends the reply-to to sections sections.append(replyToInfo) - continue - elif boundary["type"] == "status": # Returns a dictionary with status information - statusInfo = self.__statusParsing(line, boundary["start"]) + statusInfo = self.__statusParsing(sectionContent, boundary["start"]) if statusInfo["type"] == "parse_error": @@ -346,17 +330,9 @@ def __parseSections(self) -> list: return sections - # Removes the begining delimiter - sectionContent = self.__getFormattedMessageContent(sectionContent) - - # Appends content to empty content key to avoid passing large amounts of info that isnt used within the function - statusInfo['content'] = sectionContent - # Appends the status to sections sections.append(statusInfo) - continue - elif boundary["type"] == "replyFromUser": # Returns a dictionary with userReply information @@ -498,7 +474,7 @@ def __initialMessageParsing(self, content: list) -> dict: return initialMessageDictionary - def __editParsing(self, line: str, lineNum: int) -> dict: + def __editParsing(self, content: list, lineNum: int) -> dict: """Returns a dictionary with edit information Returns: @@ -507,35 +483,38 @@ def __editParsing(self, line: str, lineNum: int) -> dict: formattedDateTime = "" editedBy = "" - + delimiterLine = content[0] # Parses for the author of the edit, which is located between the "*** Edited by: " and " at:" substrings try: - editedBy = (re.search("(?<=\*{3} Edited by: )(.*)(?= at:)", line)).group() + editedBy = (re.search("(?<=\*{3} Edited by: )(.*)(?= at:)", delimiterLine)).group() except: errorMessage = "*** Edited by: [username] at: [date and time] ***\n" - return self.__errorParsing(line, lineNum, errorMessage) + return self.__errorParsing(delimiterLine, lineNum, errorMessage) try: # Parses for the date and time of the edit, which is located between the " at: " and "***\n" substrings - dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", delimiterLine)).group() except: # Returns an error message if there is no space after "at:" errorMessage = "*** Edited by: [username] at: [date and time] ***\n" - return self.__errorParsing(line, lineNum, errorMessage) + return self.__errorParsing(delimiterLine, lineNum, errorMessage) # Attempts to format the date and time into utc format formattedDateTime = self.__getFormattedDate(dateTimeString) + # Remove the delimiter String and unecessary newlines + formattedContent = self.__getFormattedMessageContent(content) + editInfo = { "type": "edit", "datetime": formattedDateTime, "by": editedBy, - "content": "" + "content": formattedContent } return editInfo - def __replyToParsing(self, line: str, lineNum: int) -> dict: + def __replyToParsing(self, content: list, lineNum: int) -> dict: """Returns a dictionary with reply to user information Returns: @@ -544,36 +523,43 @@ def __replyToParsing(self, line: str, lineNum: int) -> dict: formattedDateTime = "" repliedBy = "" + delimiterLine = content[0] + for count, line in enumerate(content): + if line.startswith("===="): + errorMessage = "Reply-from-user ending delimter encountered without Reply-from-user starting delimter" + return self.__errorParsing(line, lineNum + count + 1, errorMessage) #tech112 try: # Parses for the author of the reply, which is located between the "*** Replied by: " and " at:" substrings - repliedBy = (re.search("(?<=\*{3} Replied by: )(.*)(?= at:)", line)).group() + repliedBy = (re.search("(?<=\*{3} Replied by: )(.*)(?= at:)", delimiterLine)).group() except: errorMessage = "*** Replied by: [username] at: [date and time] ***\n" - return self.__errorParsing(line, lineNum, errorMessage) + return self.__errorParsing(delimiterLine, lineNum, errorMessage) # Parses for the date and time of the reply, which is located between the " at: " and "***\n" substrings try: - dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", delimiterLine)).group() except: errorMessage = "*** Replied by: [username] at: [date and time] ***\n" - return self.__errorParsing(line, lineNum, errorMessage) + return self.__errorParsing(delimiterLine, lineNum, errorMessage) # Formats date to UTC formattedDateTime = self.__getFormattedDate(dateTimeString) + formattedContent = self.__getFormattedMessageContent(content) + replyInfo = { "type": "reply_to_user", "datetime": formattedDateTime, "by": repliedBy, - "content": "" + "content": formattedContent } return replyInfo - def __statusParsing(self, line: str, lineNum: int) -> dict: + def __statusParsing(self, content: list, lineNum: int) -> dict: """Returns a dictionary with status information Returns: @@ -582,32 +568,42 @@ def __statusParsing(self, line: str, lineNum: int) -> dict: formattedDateTime = "" updatedBy = "" + delimiterLine = content[0] + + for count, line in enumerate(content): + if line.startswith("===="): + errorMessage = "Reply-from-user ending delimter encountered without Reply-from-user starting delimter" + return self.__errorParsing(line, lineNum + count + 1, errorMessage) # Parses for the author of the status change, which is located between the "*** Status updated by: " and " at:" substrings try: - updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", line)).group() + updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", delimiterLine)).group() except: errorMessage = "*** Status updated by: [username] at: [date and time] ***\n" - return self.__errorParsing(line, lineNum, errorMessage) + return self.__errorParsing(delimiterLine, lineNum, errorMessage) # Parses for the date and time of the status change, which is located between the " at: " and "***\n" substrings try: - dateTimeString = re.search("(?<= at: )(.*)(?= \*\*\*\n)", line).group() + dateTimeString = re.search("(?<= at: )(.*)(?= \*\*\*\n)", delimiterLine).group() except: errorMessage = "*** Status updated by: [username] at: [date and time] ***\n" - return self.__errorParsing(line, lineNum, errorMessage) + return self.__errorParsing(delimiterLine, lineNum, errorMessage) # Formats the date to UTC formattedDateTime = self.__getFormattedDate(dateTimeString) - + + # Remove the delimiter String and unecessary newlines + formattedContent = self.__getFormattedMessageContent(content) + statusInfo = { "type": "status", "datetime": formattedDateTime, "by": updatedBy, - "content": "" - } + "content": formattedContent + } + return statusInfo @@ -617,14 +613,9 @@ def __userReplyParsing(self, replyContent: list, lineNumber: int) -> dict: Returns: dictionary: "type": "replyFromUser", datetime, subject, userName, userEmail, content, ccRecipients """ - replyFromInfo = { - "type": "reply_from_user", - "datetime": "", - "from_name": "", - "from_email": "", - "cc": [], - "content": [] - } + replyFromInfo = {} + + replyFromInfo["type"] = "reply_from_user" newLineCounter = 0 endingDelimiterCount = 0 @@ -636,17 +627,18 @@ def __userReplyParsing(self, replyContent: list, lineNumber: int) -> dict: # number with the enumerate function for lineNum, line in enumerate(replyContent): + if endingDelimiterCount == 0 and lineNum == len(replyContent) - 1: + errorMessage = "Did not encounter a reply-from-user ending delimiter" + return self.__errorParsing(line, lineNumber + lineNum + 1, errorMessage) + #Checks for a newline and breaks for loop on second occurance of a newline if line == "\n": newLineCounter = newLineCounter + 1 - if newLineCounter == 2: - break - elif endingDelimiterCount == 0 and lineNum == len(replyContent) - 1: - errorMessage = "Did not encounter a reply-from-user ending delimiter" - return self.__errorParsing(line, lineNumber + lineNum + 1, errorMessage) + elif line.startswith("===="): + endingDelimiterCount = endingDelimiterCount + 1 - elif line.startswith("From: "): + elif line.startswith("From: ") and newLineCounter == 1: # Returns a list of one tuples with a name stored in the first index of the tuple and an email stored in the second index of the tuple emailList = email.utils.getaddresses([line]) replyFromInfo["from_name"] = emailList[0][0] @@ -654,7 +646,7 @@ def __userReplyParsing(self, replyContent: list, lineNumber: int) -> dict: linesToRemove.append(lineNum) - elif line.startswith("Date: "): + elif line.startswith("Date: ") and newLineCounter == 1: # Matches everything after "Date: " try: dateStr = (re.search("(?<=Date: )(.*)", line)).group() @@ -667,7 +659,10 @@ def __userReplyParsing(self, replyContent: list, lineNumber: int) -> dict: linesToRemove.append(lineNum) - elif line.startswith("Cc: "): + elif line.startswith("Cc: ") and newLineCounter == 1: + + replyFromInfo["cc"] = [] + # Returns a list of tuples with email information recipientsList = email.utils.getaddresses([line]) @@ -675,8 +670,8 @@ def __userReplyParsing(self, replyContent: list, lineNumber: int) -> dict: for cc in recipientsList: # Stores the cc information in a dictionary and appends it to the ccRecipientsList replyFromInfo["cc"].append( - {"name":cc[0], - "email":cc[1]} + {"name": cc[0], + "email": cc[1]} ) linesToRemove.append(lineNum) @@ -732,25 +727,24 @@ def __errorParsing(self, line: str, lineNum: int, expectedSyntax: str) -> dict: } """ - errorDictionary = { - "type": "parse_error", - "datetime": self.__getFormattedDate(str(datetime.datetime.now())), - "file_path": "", - "expected": "", - "got": "", - "line_num": 0 - } + errorDictionary = {} + + # Type + errorDictionary["type"] = "parse_error" + + # Dateime of the parse error + errorDictionary["datetime"] = self.__getFormattedDate(str(datetime.datetime.now())) - # Filepath + # Item filepath errorDictionary["file_path"] = self.__path - # Error message with itemm line and column numbers + # Expected value errorDictionary["expected"] = expectedSyntax - # Appends the item line to the content list in the error dictionary + # line that threw error errorDictionary["got"] = line - # Apeends error num to the dictonary + # line number that threw error errorDictionary["line_num"] = lineNum # returns the error dictionary @@ -944,4 +938,4 @@ def getQueues() -> list: if isDirectory and isValid: queues.append(Queue(file)) - return queues + return queues \ No newline at end of file