From f86d8da50bfbfde19b7245fcd4f424e79d435d52 Mon Sep 17 00:00:00 2001 From: Jacob Daniel Bennett Date: Mon, 14 Sep 2020 13:05:11 -0400 Subject: [PATCH] Partial Section Parsing Capability for Directory Info, edits, reply to user, and status changes. --- api/ECNQueue.py | 184 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 181 insertions(+), 3 deletions(-) diff --git a/api/ECNQueue.py b/api/ECNQueue.py index e38cb70..976f770 100644 --- a/api/ECNQueue.py +++ b/api/ECNQueue.py @@ -3,7 +3,8 @@ #------------------------------------------------------------------------------# # Imports #------------------------------------------------------------------------------# -import os, time, email, re +import os, time, email, re, datetime +from dateutil.parser import parse from typing import Union import json @@ -42,7 +43,7 @@ def __init__(self, queue: str, number: int) -> None: self.lastUpdated = self.__getLastUpdated() self.__rawItem = self.__getRawItem() self.headers = self.__parseHeaders() - self.content = self.__getContent() + self.content = self.__parseSections() self.isLocked = self.__isLocked() self.userEmail = self.__parseFromData(data="userEmail") self.userName = self.__parseFromData(data="userName") @@ -174,6 +175,179 @@ def __getContent(self) -> list: # TODO: Implement section parsing. + def __parseSections(self) -> list: + sections = [] + headerEnd = self.__getHeaderBoundary() + + contentStart = self.__getHeaderBoundary() + 1 + contentEnd = len(self.__rawItem) - 1 + + # Find line numbers where sections start + sectionBoundaries = [ {"start": contentStart} ] + + directoryInfoPattern = ["\tName: ", + " Login: ", + " Computer: ", + " Location: ", + " Email: ", + " Phone: ", + " Office: ", + " UNIX Dir: ", + " Zero Dir: ", + " User ECNDB: ", + " Host ECNDB: ", + " Subject: "] + directoryInfo = ["\n"] + + for lineNumber in range(contentStart, contentEnd + 1): + line = self.__rawItem[lineNumber] + if line.startswith("***") or line.startswith("===") and not line.startswith("===="): + sectionBoundaries.append({"start": lineNumber}) + else: + for item in directoryInfoPattern: + if(line.startswith(item)): + directoryInfo.append(line) + + if len(directoryInfo) > 1: + sections.append( + {"type": "directoryInformation", + "content": directoryInfo + } + ) + sectionBoundaries.append({"start": contentEnd + 1}) + + # Set line number where section end + for boundaryIndex in range(0, len(sectionBoundaries) - 1): + sectionBoundaries[boundaryIndex]["end"] = sectionBoundaries[boundaryIndex + 1]["start"] + + # Remove End of File boundary + del sectionBoundaries[-1] + + + # Make list of sections and parse content + delimiters = [ + {"name": "edit", "pattern": "*** Edited"}, + {"name": "status", "pattern": "*** Status"}, + {"name": "replyToUser", "pattern": "*** Replied"}, + {"name": "replyFromUser", "pattern": "=== "}, + ] + + for boundary in sectionBoundaries: + line = self.__rawItem[boundary["start"]] + sectionType = None + + for delimiter in delimiters: + if line.startswith(delimiter["pattern"]): + sectionType = delimiter["name"] + break + + sectionContent = self.__rawItem[boundary["start"] : boundary["end"]] + + if sectionType is None: + sectionType = "initialMessage" + + elif sectionType == "edit": + formattedDate = "" + formattedTime = "" + editedBy = "" + + #parses for the author of the edit, which is located between the "*** Edited by: " and " at:" substrings + editedBy = (re.search("(?<=\*{3} Edited by: )(.*)(?= at:)", line)).group() + + #parses for the date and time of the edit, which is located between the " at: " and "***\n" substrings + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + + try: + dateObject = parse(dateTimeString) + formattedDate = dateObject.strftime("%Y-%m-%dT%H:%M:%S%z") + formattedTime = dateObject.strftime("%H:%M:%S%z") + except: + formattedDate = "invalid" + formattedTime = "Invalid" + + sections.append( + {"type": sectionType, + "by": editedBy, + "date": formattedDate, + "time": formattedTime, + "content": sectionContent,} + ) + + elif sectionType == "replyToUser": + formattedDate = "" + formattedTime = "" + repliedBy = "" + + #parses for the author of the reply, which is located between the "*** Replied by: " and " at:" substrings + repliedBy = (re.search("(?<=\*{3} Replied by: )(.*)(?= at:)", line)).group() + + #parses for the date and time of the reply, which is located between the " at: " and "***\n" substrings + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + + try: + dateObject = parse(dateTimeString) + formattedDate = dateObject.strftime("%Y-%m-%dT%H:%M:%S%z") + formattedTime = dateObject.strftime("%H:%M:%S%z") + except: + formattedDate = "invalid" + formattedTime = "Invalid" + + sections.append( + {"type": sectionType, + "by": repliedBy, + "date": formattedDate, + "time": formattedTime, + "content": sectionContent} + ) + + elif sectionType == "status": + formattedDate = "" + formattedTime = "" + updatedBy = "" + + #parses for the author of the status change, which is located between the "*** Status updated by: " and " at:" substrings + updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", line)).group() + + #parses for the date and time of the status change, which is located between the " at: " and "***\n" substrings + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + + try: + dateObject = parse(dateTimeString) + formattedDate = dateObject.strftime("%Y-%m-%dT%H:%M:%S%z") + formattedTime = dateObject.strftime("%H:%M:%S%z") + except: + formattedDate = "invalid" + formattedTime = "Invalid" + + sections.append( + {"type": sectionType, + "by": updatedBy, + "date": formattedDate, + "time": formattedTime, + "content": sectionContent} + ) + + elif sectionType == "": + #elif sectionType == "replyFromUser": + formattedDate = "" + formattedTime = "" + repliedBy = "" + + #parses for the author of the status change, which is located between the "*** Status updated by: " and " at:" substrings + updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", line)).group() + + #parses for the date and time of the status change, which is located between the " at: " and "***\n" substrings + dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group() + + + else: + sections.append( + {"type": sectionType, + "content": sectionContent} + ) + + return sections + def __isLocked(self) -> Union[str, bool]: """Returns a string info about the lock if true and a bool False if false @@ -345,4 +519,8 @@ def getQueues() -> list: if isDirectory and isValid: queues.append(Queue(file)) - return queues \ No newline at end of file + return queues + +if __name__ == "__main__": + item = Item("ce", 11) + print()