From 8ef3e06c73c59bb228aab1537f585d0582a848eb Mon Sep 17 00:00:00 2001
From: Jacob Daniel Bennett <benne238@w2vm2.ecn.purdue.edu>
Date: Wed, 7 Oct 2020 09:02:41 -0400
Subject: [PATCH] Added initial message parser helper function, completely
 updated dictionary keys to camel case

---
 api/ECNQueue.py | 280 +++++++++++++++++++++++++++++-------------------
 1 file changed, 170 insertions(+), 110 deletions(-)

diff --git a/api/ECNQueue.py b/api/ECNQueue.py
index 0ee74ca..3537bb3 100644
--- a/api/ECNQueue.py
+++ b/api/ECNQueue.py
@@ -181,126 +181,99 @@ def __parseSections(self) -> list:
 		contentStart = self.__getHeaderBoundary() + 1
 		contentEnd = len(self.__rawItem) - 1 
 
-		directoryInfo = {"type": "directoryInformation"}
+		#directoryInfo = {"type": "directoryInformation"}
 		initialMessageContent = []
-		endInitialMessage = False
+		initialMessageSection = True
+		
+		# Delimiter info
+		delimiters = [
+			{"name": "edit", "pattern": "*** Edited"},
+			{"name": "status", "pattern": "*** Status"},
+			{"name": "replyToUser", "pattern": "*** Replied"},
+			{"name": "replyFromUser", "pattern": "=== "},
+		]
 
 		# Checks for Directory Identifiers
-		if self.__rawItem[contentStart] == "\n" and self.__rawItem[contentStart + 1].startswith("\tName:"):
+		if self.__rawItem[contentStart] == "\n" and self.__rawItem[contentStart + 1].startswith("\t"):
 
 			# Parses the directory information and returns a dictionary of directory values
-			directoryInfo["content"] = self.__directoryParsing(contentStart + 1)
+			directoryInfo = self.__directoryParsing(contentStart + 1)
+
+			# Appends Directory Information into the sections array
+			sections.append(directoryInfo)
 
 			# Sets the initial message start to the next line after all directory lines and newlines
-			contentStart = contentStart + len(directoryInfo["content"]) + 2
+			contentStart = contentStart + len(directoryInfo) + 1
 
-		else:
+		#else:
 
 			# Initialize an empty dictionary for content
-			directoryInfo["content"] = {}
-
-		# Appends Directory Information into the sections array
-		sections.append(directoryInfo)
+			#directoryInfo["content"] = {}
 
 		# Find line numbers where sections start
-		sectionBoundaries = [ {"start": contentStart} ]
+		#sectionBoundaries = [ {"start": contentStart} ]
+
+		sectionBoundaries = []
 
 		# Set to true if a reply-from-user begining delimiter is parsed. Set to false if the ending delimiter is encountered
-		replyFromUserDelimiter = False
+		#replyFromUserDelimiter = False
 
 		# Parses the entire contents of the message, stores everything before any delimiter as the initial message
+		# and the line number of any delimiters
 		for lineNumber in range(contentStart, contentEnd + 1):
 
 			line = self.__rawItem[lineNumber]
 
 			if line.startswith("***")  or line.startswith("===") and not line.startswith("===="):
+				for delimiter in delimiters:
+
+					if line.startswith(delimiter["pattern"]):
+
+						sectionBoundaries.append({"start": lineNumber, "type": delimiter["name"]})
+						break
 				
-				# Signifies that the inital message has been copletely parsed
-				endInitialMessage = True
+				# Signifies that the inital message has been completely parsed
+				initialMessageSection = False
+
+				# Stores what line every delimeter starts/ends
+				#sectionBoundaries.append({"start": lineNumber, "type": "delimiter"})
 
-				if replyFromUserDelimiter == False and line.startswith("===") and not line.startswith("===="):
+				#if replyFromUserDelimiter == False and line.startswith("===") and not line.startswith("===="):
 
 					# Stores what line every delimeter starts/ends
-					sectionBoundaries.append({"start": lineNumber})
+					# sectionBoundaries.append({"start": lineNumber, "type": "delimiter"})
 
-					replyFromUserDelimiter = True
+					#replyFromUserDelimiter = True
 				
 				# Checks for nesteded delimiters within the reply from user
-				elif replyFromUserDelimiter == True and (line.startswith("===") or line.startswith("***")) and not line.startswith("===="):
+				#elif replyFromUserDelimiter == True and (line.startswith("===") or line.startswith("***")) and not line.startswith("===="):
 
-					columnNum = 0
+					#columnNum = 0
 
-					errorMessage = "Nested delimiter encountered"
+					#errorMessage = "Nested delimiter encountered"
 
-					errorDictionary = self.__errorParsing(line, lineNumber, columnNum, errorMessage)
+					#errorDictionary = self.__errorParsing(line, lineNumber, columnNum, errorMessage)
 
 					# Appends the error dictionary to sections
-					sections.append(errorDictionary)
+					#sections.append(errorDictionary)
 
 					# Immediately exits the section parsing function because item content needs to be edited from the cli
-					return sections
+					#return sections
 
-				elif replyFromUserDelimiter == False:
+				#elif replyFromUserDelimiter == False:
 
-					sectionBoundaries.append({"start": lineNumber})
+					#sectionBoundaries.append({"start": lineNumber})
 
-			elif line.startswith("===="):
+			#elif line.startswith("===="):
 
-				replyFromUserDelimiter = False	
+				#replyFromUserDelimiter = False	
 
-			elif endInitialMessage == False:
+			elif initialMessageSection == True:
 
 				# Delimiter not encountered yet, so append line to initial message list
-				initialMessageContent.append(line)
+				sectionBoundaries.append({"start": lineNumber, "type": "initial_message"})
 
-		# Removes unecessary newlines from the begining and the end of the initial message
-
-		initialMessageContent = self.__getFormattedMessageContent(initialMessageContent)
-
-		# Gets the initial message date from the header
-		initialMessageDateStr = self.__getMostRecentHeaderByType("Date")
-
-		# Formats the initial message date to UTC
-		initialMessageFormattedDate = self.__getFormattedDate(initialMessageDateStr)
-		
-		# Stores list of dictionaries for the recipients of the initial message
-		initialMessageRecipientsSection = []
-
-		# Parses the header looking for recipients of the initial message and stores it in a list of tuples
-		initialMessageRecipientsList = email.utils.getaddresses([self.__getMostRecentHeaderByType("To")])
-	
-		# Parses the CC list and stores the cc recipient information in a list of dictionaries
-		for recipients in initialMessageRecipientsList:
-
-			initialMessageRecipientsSection.append(
-				{"name": recipients[0],
-				"email": recipients[1]}
-				)
-
-		# Stores list of dictionaries for CC information
-		initialMessageCCSection = []
-
-		# Parses the header looking for CC recipients of the initial message and stores it in a list of tuples
-		initialMessageCCList = email.utils.getaddresses([self.__getMostRecentHeaderByType("CC")])
-
-		# Parses the CC list and stores the cc recipient information in a list of dictionaries
-		for ccRecipients in initialMessageCCList:
-
-			initialMessageCCSection.append(
-				{"name": ccRecipients[0],
-				"email": ccRecipients[1]}
-				)
-
-		# Appends all initial message information to the sections array
-		sections.append(
-			{"type": "initial_message",
-			"datetime": initialMessageFormattedDate,
-			"from_name": self.__parseFromData(data="userName"),
-			"user_email": self.__parseFromData(data="userEmail"),
-			"to": initialMessageRecipientsSection,
-			"cc": initialMessageCCSection,
-			"content": initialMessageContent}
-		)
+				initialMessageSection = False
 
 		# Assignment Information
 		assignedBy = ""
@@ -339,7 +312,7 @@ def __parseSections(self) -> list:
 
 		sectionBoundaries.append({"start": contentEnd + 1})
 
-		# Set line number where section end
+		# Sets the end of the section boundary to the begining of the next section boundary
 		for boundaryIndex in range(0, len(sectionBoundaries) - 1):
 
 			sectionBoundaries[boundaryIndex]["end"] = sectionBoundaries[boundaryIndex + 1]["start"]
@@ -347,14 +320,6 @@ def __parseSections(self) -> list:
 		# Remove End of File boundary
 		del sectionBoundaries[-1]
 
-		# Different delimiters for different message events
-		delimiters = [
-			{"name": "edit", "pattern": "*** Edited"},
-			{"name": "status", "pattern": "*** Status"},
-			{"name": "replyToUser", "pattern": "*** Replied"},
-			{"name": "replyFromUser", "pattern": "=== "},
-		]
-
 		# Parses through all the boundaries in section boundaries
 		for boundary in sectionBoundaries:
 
@@ -376,6 +341,10 @@ def __parseSections(self) -> list:
 			# Returns all of the lines within the current section
 			sectionContent = self.__rawItem[boundary["start"] : boundary["end"]]
 
+			if boundary["type"] == "initial_message":
+				initialMessageDictionary = self.__initialMessageParsing(boundary["start"], boundary["end"])
+				sections.append(initialMessageDictionary)
+				
 			# Checks for each section type
 			if sectionType == "edit":
 				
@@ -440,6 +409,7 @@ def __directoryParsing(self, directoryStartLine: int) -> dict:
 		"""Returns a dictionary with directory information
 
 		Returns: dictionary: 
+				"type": "directoryInformation"
 				"Name": name,
 		    	"Login": login,
 				"Computer": computer,
@@ -453,29 +423,118 @@ def __directoryParsing(self, directoryStartLine: int) -> dict:
 				"Host ECNDB": host_ecdbn,
 				"Subject": subject
 		"""
-		directoryInformation = {}
-		
-		directoryEndingLine = directoryStartLine
-		while directoryEndingLine - directoryStartLine <= 11:
+		directoryInformation = {"type": "directory_information"}
+
+		# Assumes a full directory with 12 items including the starting line
+		directoryEndingLine = directoryStartLine + 11
+
+		# Executies until the directory start line is greater than the directory ending line
+		while directoryStartLine <= directoryEndingLine:
 			
-			info = self.__rawItem[directoryEndingLine]
+			# Returns the line number at directory start line
+			info = self.__rawItem[directoryStartLine]
 
+			# Breaks the loop if it encountrs a newline, signifying the end of the directory information 
 			if info == "\n":
 
 				break
-
+			
 			else:
+
+				# Removes white including space, newlines, and tabs from the directory info line
 				strippedInfo = info.strip()
-				try:
+
+				# Attempts to find ": " but will accept ":", denoting a blank entry for a directory item
+				if ": " in strippedInfo:
+				
+					# Seperates the directory info line into two variables, the first variable being the key, the second being the value
 					key, value = strippedInfo.split(": ")
-				except:
+
+					# Adds the key value pair to the directory info dictionary
+					directoryInformation[key] = value
+
+				elif ":" in strippedInfo:
+					
+					# Seperates the directory info line into two variables, the first variable being the key, the second being the value
 					key, value = strippedInfo.split(":")
 
-				directoryInformation[key] = value
+					# Adds the key value pair to the directory info dictionary
+					directoryInformation[key] = value
 
-			directoryEndingLine = directoryEndingLine + 1
+			# Counter to denote the end of the directory
+			directoryStartLine = directoryStartLine + 1
 
+		# Returns the directory information dictionary
 		return directoryInformation
+	
+	def __initialMessageParsing(self, startLine: int, endLine: int) -> dict:
+		"""Returns a dictionary with initial message information
+
+		Returns:
+			dictionary: "type": "initial_message", 
+			"datetime": utcdate,
+			"from_name": fromName,
+			"user_email": userEmail,
+			"to": [{email, name}],
+			"cc": [{email, name}],
+			"content": ["message_content"]
+		"""
+		initialMessageDictionary = {
+			#"type": "initial_message",
+			#"datetime": initialMessageFormattedDate,
+			#"from_name": self.__parseFromData(data="userName"),
+			#"user_email": self.__parseFromData(data="userEmail"),
+			#"to": initialMessageRecipientsSection,
+			#"cc": initialMessageCCSection,
+			#"content": initialMessageContent
+			}
+		
+		initialMessageDictionary["type"] = "initial_message"
+
+		# Gets the initial message date from the header
+		rawMessageDateStr = self.__getMostRecentHeaderByType("Date")
+
+		# Sets datetime in the intialMessage dictionary to UTC formatted date
+		initialMessageDictionary["datetime"] = self.__getFormattedDate(rawMessageDateStr)
+
+		initialMessageDictionary["from_name"] = self.__parseFromData(data="userName")
+
+		initialMessageDictionary["user_email"] = self.__parseFromData(data="userEmail")
+		
+		# Stores list of dictionaries for the recipients of the initial message
+		initialMessageDictionary["to"] = []
+
+		# Parses the header looking for recipients of the initial message and stores it in a list of tuples
+		rawMessageRecipientsList = email.utils.getaddresses([self.__getMostRecentHeaderByType("To")])
+	
+		# Parses the CC list and stores the cc recipient information in a list of dictionaries
+		for recipients in rawMessageRecipientsList:
+
+			initialMessageDictionary["to"].append(
+				{"name": recipients[0],
+				"email": recipients[1]}
+				)
+
+		# Stores list of dictionaries for CC information
+		initialMessageDictionary["cc"] = []
+
+		# Parses the header looking for CC recipients of the initial message and stores it in a list of tuples
+		rawMessageCCList = email.utils.getaddresses([self.__getMostRecentHeaderByType("CC")])
+
+		# Parses the CC list and stores the cc recipient information in a list of dictionaries
+		for ccRecipients in rawMessageCCList:
+
+			initialMessageDictionary["cc"].append(
+				{"name": ccRecipients[0],
+				"email": ccRecipients[1]}
+				)
+
+		rawMessageContent = self.__rawItem[startLine : endLine]
+
+		# Removes unecessary newlines from the begining and the end of the initial message
+		initialMessageDictionary["content"] = self.__getFormattedMessageContent(rawMessageContent)
+
+		return initialMessageDictionary
 
 	def __editParsing(self, line: str) -> dict:
 		"""Returns a dictionary with edit information
@@ -497,10 +556,10 @@ def __editParsing(self, line: str) -> dict:
 		formattedDateTime = self.__getFormattedDate(dateTimeString)
 
 		editInfo = {
-		"type": "edit",
-		"datetime": formattedDateTime,
-		"by": editedBy,
-		"content": ""
+			"type": "edit",
+			"datetime": formattedDateTime,
+			"by": editedBy,
+			"content": ""
 		}
 
 		return editInfo
@@ -596,8 +655,6 @@ def __userReplyParsing(self, replyContent: list) -> dict:
 				subject = (re.search("(?<=Subject: )(.*)", line)).group()
 
 				linesToRemove.append(lineNum)
-				
-				continue
 
 			elif line.startswith("From: "):
 				
@@ -612,8 +669,6 @@ def __userReplyParsing(self, replyContent: list) -> dict:
 
 				linesToRemove.append(lineNum)
 
-				continue
-
 			elif line.startswith("Date: "):
 				
 				# Matches everything after "Date: "
@@ -626,8 +681,6 @@ def __userReplyParsing(self, replyContent: list) -> dict:
 
 				linesToRemove.append(lineNum)
 
-				continue
-
 			elif line.startswith("Cc: "):
 				
 				# Returns a list of tuples with email information
@@ -643,8 +696,6 @@ def __userReplyParsing(self, replyContent: list) -> dict:
 						)
 				
 				linesToRemove.append(lineNum)
-
-				continue
 		
 		# Deletes reduntant lines from the message content in reverse order
 		for lineNum in sorted(linesToRemove, reverse = True):
@@ -722,13 +773,16 @@ def __errorParsing(self, line: str, lineNum: int, lineColumn: int, errorMessage:
 			"content": []
 		}
 
-		# Error message with line and column numbers
+		# Error message with itemm line and column numbers
 		errorMessage = errorMessage + " at " + str(lineNum) + ":" + str(lineColumn) 
 
+		# Appends the error message to the content list in the error dictionary
 		errorDictionary["content"].append(errorMessage)
 
+		# Appends the item line to the content list in the error dictionary
 		errorDictionary["content"].append(line)
 
+		# returns the error dictionary
 		return errorDictionary
 
 	def __isLocked(self) -> Union[str, bool]:
@@ -919,4 +973,10 @@ def getQueues() -> list:
 		if isDirectory and isValid:
 			queues.append(Queue(file))
 
-	return queues
\ No newline at end of file
+	return queues
+if __name__ == "__main__":
+	item = Item("ce", 11)
+	print()
+#	for queue in getQueues():
+#		for item in queue.items:
+#			print(f"${item.queue} ${item.number}")
\ No newline at end of file