File: //proc/1233/root/home/arjun/projects/buyercall/buyercall/blueprints/email/utils/mime_parser.py
import os
from email.parser import Parser
class MimeParser:
def parse(self, content):
msg_content = b'\r\n'.join(content).decode('utf-8')
msg = Parser().parsestr(msg_content)
return self.parse_email_body(msg)
# Parse email header data.
def parse_email_header(self, msg):
print('********************************* start parse_email_header *********************************')
# just parse from, to, subject header value.
header_list = ('From', 'To', 'Subject')
# loop in the header list
for header in header_list:
# get each header value.
header_value = msg.get(header, '')
print(header + ' : ' + header_value)
# Parse email body data.
def parse_email_body(self, msg):
print('********************************* start parse_email_body *********************************')
# if the email contains multiple part.
if (msg.is_multipart()):
# get all email message parts.
parts = msg.get_payload()
# loop in above parts.
for n, part in enumerate(parts):
# get part content type.
content_type = part.get_content_type()
print('---------------------------Part ' + str(n) + ' content type : ' + content_type + '---------------------------------------')
self.parse_email_content(msg)
else:
self.parse_email_content(msg)
# Parse email message part data.
def parse_email_content(self, msg):
# get message content type.
content_type = msg.get_content_type().lower()
print('---------------------------------' + content_type + '------------------------------------------')
# if the message part is text part.
if content_type=='text/plain' or content_type=='text/html':
# get text content.
content = msg.get_payload(decode=True)
# get text charset.
charset = msg.get_charset()
# if can not get charset.
if charset is None:
# get message 'Content-Type' header value.
content_type = msg.get('Content-Type', '').lower()
# parse the charset value from 'Content-Type' header value.
pos = content_type.find('charset=')
if pos >= 0:
charset = content_type[pos + 8:].strip()
pos = charset.find(';')
if pos>=0:
charset = charset[0:pos]
if charset:
content = content.decode(charset)
print(content)
# if this message part is still multipart such as 'multipart/mixed','multipart/alternative','multipart/related'
elif content_type.startswith('multipart'):
# get multiple part list.
body_msg_list = msg.get_payload()
# loop in the multiple part list.
for body_msg in body_msg_list:
# parse each message part.
self.parse_email_content(body_msg)
# if this message part is an attachment part that means it is a attached file.
elif content_type.startswith('image') or content_type.startswith('application'):
# get message header 'Content-Disposition''s value and parse out attached file name.
attach_file_info_string = msg.get('Content-Disposition')
prefix = 'filename="'
pos = attach_file_info_string.find(prefix)
attach_file_name = attach_file_info_string[pos + len(prefix): len(attach_file_info_string) - 1]
# get attached file content.
attach_file_data = msg.get_payload(decode=True)
# get current script execution directory path.
current_path = os.path.dirname(os.path.abspath(__file__))
# get the attached file full path.
attach_file_path = current_path + '/' + attach_file_name
# write attached file content to the file.
with open(attach_file_path,'wb') as f:
f.write(attach_file_data)
print('attached file is saved in path ' + attach_file_path)
else:
content = msg.as_string()
print(content)