From 8289ae75e6692286ce6979185df7bf261f0875eb Mon Sep 17 00:00:00 2001 From: benne238 Date: Wed, 30 Jun 2021 11:34:59 -0400 Subject: [PATCH] Add logic to return an error_parse if a header is not formatted correctly in the reply_from_user section --- src/webqueue2api/parser/parser.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/webqueue2api/parser/parser.py b/src/webqueue2api/parser/parser.py index 6d43170..44db3db 100644 --- a/src/webqueue2api/parser/parser.py +++ b/src/webqueue2api/parser/parser.py @@ -2,10 +2,12 @@ import json import string import email +from email.policy import Policy +import email.errors import datetime from .utils import format_date_string from .errors import ParseError - +Policy.raise_on_defect = True parsed_item = [] @@ -58,7 +60,23 @@ def parse_section(original_string: str, match_start_index: int, tokens: pp.Parse # Parse reply-from-user headers if section_type == "reply_from_user": - headers = email.message_from_string(tokens_dictionary["headers"]) + try: + headers = email.message_from_string(tokens_dictionary["headers"]) + except email.errors.MissingHeaderBodySeparatorDefect as e: + parse_error = { + "type": "parse_error", + "datetime": format_date_string(str(datetime.datetime.now())), + "expected": "Header information with a key/value pair seperated by a colon or a newline to seperate the header from the content", + } + headers_list = tokens_dictionary["headers"].splitlines(keepends=True) + for line in headers_list: + if ":" not in line and not line.startswith(" "): + parse_error["got"] = line + line_number = original_string[:(match_start_index + original_string[match_start_index:].find(line))].count("\n") + 1 + parse_error["line_num"] = line_number + parsed_item.append(parse_error) + raise ParseError(parse_error["line_num"], f"{parse_error['got']} is a malfomred header or the start of message content without a newline") + headers_list = [] for key in headers.keys(): headers_list.append({"type": key, "content": headers[key]})