From f86d8da50bfbfde19b7245fcd4f424e79d435d52 Mon Sep 17 00:00:00 2001
From: Jacob Daniel Bennett <benne238@acererak.ecn.purdue.edu>
Date: Mon, 14 Sep 2020 13:05:11 -0400
Subject: [PATCH] Partial Section Parsing Capability for Directory Info, edits,
 reply to user, and status changes.

---
 api/ECNQueue.py | 184 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 181 insertions(+), 3 deletions(-)

diff --git a/api/ECNQueue.py b/api/ECNQueue.py
index e38cb70..976f770 100644
--- a/api/ECNQueue.py
+++ b/api/ECNQueue.py
@@ -3,7 +3,8 @@
 #------------------------------------------------------------------------------#
 # Imports
 #------------------------------------------------------------------------------#
-import os, time, email, re
+import os, time, email, re, datetime
+from dateutil.parser import parse
 from typing import Union
 import json
 
@@ -42,7 +43,7 @@ def __init__(self, queue: str, number: int) -> None:
 		self.lastUpdated = self.__getLastUpdated()
 		self.__rawItem = self.__getRawItem()
 		self.headers = self.__parseHeaders()
-		self.content = self.__getContent()
+		self.content = self.__parseSections()
 		self.isLocked = self.__isLocked()
 		self.userEmail = self.__parseFromData(data="userEmail")
 		self.userName = self.__parseFromData(data="userName")
@@ -174,6 +175,179 @@ def __getContent(self) -> list:
 
 	# TODO: Implement section parsing.
 
+	def __parseSections(self) -> list:
+		sections = []
+		headerEnd = self.__getHeaderBoundary()
+
+		contentStart = self.__getHeaderBoundary() + 1
+		contentEnd = len(self.__rawItem) - 1 
+
+		# Find line numbers where sections start
+		sectionBoundaries = [ {"start": contentStart} ]
+
+		directoryInfoPattern = ["\tName: ",
+		"       Login: ",
+		"    Computer: ",
+		"    Location: ",
+		"       Email: ",
+		"       Phone: ",
+		"      Office: ",
+		"    UNIX Dir: ",
+		"    Zero Dir: ",
+		"  User ECNDB: ",
+		"  Host ECNDB: ",
+		"     Subject: "]
+		directoryInfo = ["\n"]
+
+		for lineNumber in range(contentStart, contentEnd + 1):
+			line = self.__rawItem[lineNumber]
+			if line.startswith("***")  or line.startswith("===") and not line.startswith("===="):
+				sectionBoundaries.append({"start": lineNumber})
+			else: 
+				for item in directoryInfoPattern:
+					if(line.startswith(item)):
+						directoryInfo.append(line)
+
+		if len(directoryInfo) > 1:
+			sections.append(
+				{"type": "directoryInformation",
+				"content": directoryInfo
+				}
+			)
+		sectionBoundaries.append({"start": contentEnd + 1})
+
+		# Set line number where section end
+		for boundaryIndex in range(0, len(sectionBoundaries) - 1):
+			sectionBoundaries[boundaryIndex]["end"] = sectionBoundaries[boundaryIndex + 1]["start"]
+
+		# Remove End of File boundary
+		del sectionBoundaries[-1]
+
+
+		# Make list of sections and parse content
+		delimiters = [
+			{"name": "edit", "pattern": "*** Edited"},
+			{"name": "status", "pattern": "*** Status"},
+			{"name": "replyToUser", "pattern": "*** Replied"},
+			{"name": "replyFromUser", "pattern": "=== "},
+		]
+
+		for boundary in sectionBoundaries:
+			line = self.__rawItem[boundary["start"]]
+			sectionType = None
+			
+			for delimiter in delimiters:
+				if line.startswith(delimiter["pattern"]):
+					sectionType = delimiter["name"]
+					break
+			
+			sectionContent = self.__rawItem[boundary["start"] : boundary["end"]]
+
+			if sectionType is None:
+				sectionType = "initialMessage"
+
+			elif sectionType == "edit":
+				formattedDate = ""
+				formattedTime = ""
+				editedBy = ""
+
+				#parses for the author of the edit, which is located between the "*** Edited by: " and " at:" substrings
+				editedBy = (re.search("(?<=\*{3} Edited by: )(.*)(?= at:)", line)).group()
+
+				#parses for the date and time of the edit, which is located between the " at: " and "***\n" substrings
+				dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group()
+
+				try:
+					dateObject = parse(dateTimeString)
+					formattedDate = dateObject.strftime("%Y-%m-%dT%H:%M:%S%z")
+					formattedTime = dateObject.strftime("%H:%M:%S%z")
+				except:
+					formattedDate = "invalid"
+					formattedTime = "Invalid"
+
+				sections.append(
+					{"type": sectionType,
+					"by": editedBy,
+					"date": formattedDate,
+					"time": formattedTime,
+					"content": sectionContent,}
+					)
+			
+			elif sectionType == "replyToUser":
+				formattedDate = ""
+				formattedTime = ""
+				repliedBy = ""
+
+				#parses for the author of the reply, which is located between the "*** Replied by: " and " at:" substrings
+				repliedBy = (re.search("(?<=\*{3} Replied by: )(.*)(?= at:)", line)).group()
+
+				#parses for the date and time of the reply, which is located between the " at: " and "***\n" substrings
+				dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group()
+
+				try:
+					dateObject = parse(dateTimeString)
+					formattedDate = dateObject.strftime("%Y-%m-%dT%H:%M:%S%z")
+					formattedTime = dateObject.strftime("%H:%M:%S%z")
+				except:
+					formattedDate = "invalid"
+					formattedTime = "Invalid"
+				
+				sections.append(
+					{"type": sectionType,
+					"by": repliedBy,
+					"date": formattedDate,
+					"time": formattedTime,
+					"content": sectionContent}
+					)
+
+			elif sectionType == "status":
+				formattedDate = ""
+				formattedTime = ""
+				updatedBy = ""
+
+				#parses for the author of the status change, which is located between the "*** Status updated by: " and " at:" substrings
+				updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", line)).group()
+
+				#parses for the date and time of the status change, which is located between the " at: " and "***\n" substrings
+				dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group()
+
+				try:
+					dateObject = parse(dateTimeString)
+					formattedDate = dateObject.strftime("%Y-%m-%dT%H:%M:%S%z")
+					formattedTime = dateObject.strftime("%H:%M:%S%z")
+				except:
+					formattedDate = "invalid"
+					formattedTime = "Invalid"
+				
+				sections.append(
+					{"type": sectionType,
+					"by": updatedBy,
+					"date": formattedDate,
+					"time": formattedTime,
+					"content": sectionContent}
+					)
+
+			elif sectionType == "":
+			#elif sectionType == "replyFromUser":
+				formattedDate = ""
+				formattedTime = ""
+				repliedBy = ""
+
+				#parses for the author of the status change, which is located between the "*** Status updated by: " and " at:" substrings
+				updatedBy = (re.search("(?<=\*{3} Status updated by: )(.*)(?= at:)", line)).group()
+
+				#parses for the date and time of the status change, which is located between the " at: " and "***\n" substrings
+				dateTimeString = (re.search("(?<= at: )(.*)(?= \*\*\*\n)", line)).group()
+
+
+			else:
+				sections.append(
+					{"type": sectionType,
+					"content": sectionContent}
+					)
+
+		return sections
+
 	def __isLocked(self) -> Union[str, bool]:
 		"""Returns a string info about the lock if true and a bool False if false
 
@@ -345,4 +519,8 @@ def getQueues() -> list:
 		if isDirectory and isValid:
 			queues.append(Queue(file))
 
-	return queues
\ No newline at end of file
+	return queues
+
+if __name__ == "__main__":
+	item = Item("ce", 11)
+	print()