HEX

File: //proc/1233/root/home/arjun/projects/buyercall/buyercall/blueprints/email/utils/mime_parser.py
import os
from email.parser import Parser


class MimeParser:

    def parse(self, content):
        msg_content = b'\r\n'.join(content).decode('utf-8')
        msg = Parser().parsestr(msg_content)
        return self.parse_email_body(msg)



    # Parse email header data.    
    def parse_email_header(self, msg):
        print('********************************* start parse_email_header *********************************')
        # just parse from, to, subject header value.
        header_list = ('From', 'To', 'Subject')
        
        # loop in the header list
        for header in header_list:
            # get each header value.
            header_value = msg.get(header, '')
            print(header + ' : ' + header_value)    
        
        
    # Parse email body data.      
    def parse_email_body(self, msg):
        print('********************************* start parse_email_body *********************************')
        
        # if the email contains multiple part.
        if (msg.is_multipart()):
            # get all email message parts.
            parts = msg.get_payload()
            # loop in above parts.
            for n, part in enumerate(parts):
                # get part content type.
                content_type = part.get_content_type()
                print('---------------------------Part ' + str(n) + ' content type : ' + content_type + '---------------------------------------')
                self.parse_email_content(msg)                
        else:
            self.parse_email_content(msg) 

    # Parse email message part data.            
    def parse_email_content(self, msg):
        # get message content type.
        content_type = msg.get_content_type().lower()
        
        print('---------------------------------' + content_type + '------------------------------------------')
        # if the message part is text part.
        if content_type=='text/plain' or content_type=='text/html':
            # get text content.
            content = msg.get_payload(decode=True)
            # get text charset.
            charset = msg.get_charset()
            # if can not get charset. 
            if charset is None:
                # get message 'Content-Type' header value.
                content_type = msg.get('Content-Type', '').lower()
                # parse the charset value from 'Content-Type' header value.
                pos = content_type.find('charset=')
                if pos >= 0:
                    charset = content_type[pos + 8:].strip()
                    pos = charset.find(';')
                    if pos>=0:
                        charset = charset[0:pos]           
            if charset:
                content = content.decode(charset)
                    
            print(content)
        # if this message part is still multipart such as 'multipart/mixed','multipart/alternative','multipart/related'
        elif content_type.startswith('multipart'):
            # get multiple part list.
            body_msg_list = msg.get_payload()
            # loop in the multiple part list.
            for body_msg in body_msg_list:
                # parse each message part.
                self.parse_email_content(body_msg)
        # if this message part is an attachment part that means it is a attached file.        
        elif content_type.startswith('image') or content_type.startswith('application'):
            # get message header 'Content-Disposition''s value and parse out attached file name.
            attach_file_info_string = msg.get('Content-Disposition')
            prefix = 'filename="'
            pos = attach_file_info_string.find(prefix)
            attach_file_name = attach_file_info_string[pos + len(prefix): len(attach_file_info_string) - 1]
            
            # get attached file content.
            attach_file_data = msg.get_payload(decode=True)
            # get current script execution directory path. 
            current_path = os.path.dirname(os.path.abspath(__file__))
            # get the attached file full path.
            attach_file_path = current_path + '/' + attach_file_name
            # write attached file content to the file.
            with open(attach_file_path,'wb') as f:
                f.write(attach_file_data)
                
            print('attached file is saved in path ' + attach_file_path)    
                    
        else:
            content = msg.as_string()
            print(content)