using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Mime;
using System.Text;
using MsgReader.Helpers;
using MsgReader.Localization;
using MsgReader.Mime.Decode;
using MsgReader.Mime.Header;
namespace MsgReader.Mime
{
///
/// A MessagePart is a part of an email message used to describe the whole email parse tree.
///
/// Email messages are tree structures:
/// Email messages may contain large tree structures, and the MessagePart are the nodes of the this structure.
/// A MessagePart may either be a leaf in the structure or a internal node with links to other MessageParts.
/// The root of the message tree is the class.
///
/// Leafs:
/// If a MessagePart is a leaf, the part is not a MultiPart message.
/// Leafs are where the contents of an email are placed.
/// This includes, but is not limited to: attachments, text or images referenced from HTML.
/// The content of an attachment can be fetched by using the property.
/// If you want to have the text version of a MessagePart, use the method which will
/// convert the into a string using the encoding the message was sent with.
///
/// Internal nodes:
/// If a MessagePart is an internal node in the email tree structure, then the part is a MultiPart message.
/// The property will then contain links to the parts it contain.
/// The property of the MessagePart will not be set.
///
/// See the example for a parsing example.
/// This class cannot be instantiated from outside the library.
///
///
/// This example illustrates how the message parse tree looks like given a specific message
///
/// The message source in this example is:
///
/// MIME-Version: 1.0
/// Content-Type: multipart/mixed; boundary="frontier"
///
/// This is a message with multiple parts in MIME format.
/// --frontier
/// Content-Type: text/plain
///
/// This is the body of the message.
/// --frontier
/// Content-Type: application/octet-stream
/// Content-Transfer-Encoding: base64
///
/// PGh0bWw+CiAgPGHLYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg
/// Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==
/// --frontier--
///
/// The tree will look as follows, where the content-type media type of the message is listed
///
/// - Message root
/// - multipart/mixed MessagePart
/// - text/plain MessagePart
/// - application/octet-stream MessagePart
///
/// It is possible to have more complex message trees like the following:
///
/// - Message root
/// - multipart/mixed MessagePart
/// - text/plain MessagePart
/// - text/plain MessagePart
/// - multipart/parallel
/// - audio/basic
/// - image/tiff
/// - text/enriched
/// - message/rfc822
///
/// But it is also possible to have very simple message trees like:
///
/// - Message root
/// - text/plain
///
///
public class MessagePart
{
#region Fields
private bool? _isInline;
#endregion
#region Properties
///
/// The Content-Type header field.
///
/// If not set, the ContentType is created by the default "text/plain; charset=us-ascii" which is
/// defined in RFC 2045 section 5.2.
///
/// If set, the default is overridden.
///
public ContentType ContentType { get; }
///
/// A human readable description of the body
///
/// if no Content-Description header was present in the message.
///
public string ContentDescription { get; }
///
/// This header describes the Content encoding during transfer.
///
/// If no Content-Transfer-Encoding header was present in the message, it is set
/// to the default of SevenBit in accordance to the RFC.
///
/// See RFC 2045 section 6 for details
public ContentTransferEncoding ContentTransferEncoding { get; }
///
/// ID of the content part (like an attached image). Used with MultiPart messages.
///
/// if no Content-ID header field was present in the message.
///
public string ContentId { get; }
///
/// Used to describe if a is to be displayed or to be though of as an attachment.
/// Also contains information about filename if such was sent.
///
/// if no Content-Disposition header field was present in the message
///
public ContentDisposition ContentDisposition { get; }
///
/// This is the encoding used to parse the message body if the
/// is not a MultiPart message. It is derived from the character set property.
///
public Encoding BodyEncoding { get; }
///
/// This is the parsed body of this .
/// It is parsed in that way, if the body was ContentTransferEncoded, it has been decoded to the
/// correct bytes.
///
/// It will be if this is a MultiPart message.
/// Use to check if this is a MultiPart message.
///
public byte[] Body { get; private set; }
///
/// This will be set to true if this is the first found Text . This way it
/// indicates that this is the text variant of the E-mail body.
///
internal bool IsTextBody { get; set; }
///
/// This will be set to true if this is the first found Html . This way it
/// indicates that this is the html variant of the E-mail body.
///
internal bool IsHtmlBody { get; set; }
///
/// Describes if this is a MultiPart message
///
/// The is a MultiPart message if the media type property starts with "multipart/"
///
public bool IsMultiPart => ContentType.MediaType.StartsWith("multipart/", StringComparison.OrdinalIgnoreCase);
///
/// A is considered to be holding text in it's body if the MediaType
/// starts either "text/" or is equal to "message/rfc822"
///
public bool IsText
{
get
{
var mediaType = ContentType.MediaType;
return mediaType.StartsWith("text/", StringComparison.OrdinalIgnoreCase) ||
mediaType.Equals("message/rfc822", StringComparison.OrdinalIgnoreCase);
}
}
///
/// A is considered to be an inline attachment, if
/// it is has the Inline set to True
///
public bool IsInline
{
get
{
if (_isInline.HasValue) return _isInline.Value;
_isInline = ContentDisposition != null && ContentDisposition.Inline;
return _isInline.Value;
}
internal set => _isInline = value;
}
///
/// A is considered to be an attachment, if
/// - it is not holding text and is not a MultiPart message
/// or
/// - it has a Content-Disposition header that says it is an attachment
///
public bool IsAttachment
{
get
{
// Inline is the opposite of attachment
if (IsHtmlBody)
return false;
if (IsTextBody)
return false;
return !IsMultiPart;
}
}
///
/// This is a convenient-property for figuring out a FileName for this .
/// If the is a MultiPart message, then it makes no sense to try to find a FileName.
///
/// The FileName can be specified in the , or
/// properties.
/// If none of these places two places tells about the FileName, a default is returned.
///
public string FileName { get; }
///
/// If this is a MultiPart message, then this property
/// has a list of each of the Multiple parts that the message consists of.
///
/// It is if it is not a MultiPart message.
/// Use to check if this is a MultiPart message.
///
public List MessageParts { get; private set; }
#endregion
#region Constructors
///
/// Used to construct the topmost message part
///
/// The body that needs to be parsed
/// The headers that should be used from the message
/// If or
/// is
internal MessagePart(byte[] rawBody, MessageHeader headers)
{
if(rawBody == null)
throw new ArgumentNullException(nameof(rawBody));
if(headers == null)
throw new ArgumentNullException(nameof(headers));
ContentType = headers.ContentType;
ContentDescription = headers.ContentDescription;
ContentTransferEncoding = headers.ContentTransferEncoding;
ContentId = headers.ContentId;
ContentDisposition = headers.ContentDisposition;
FileName = FindFileName(rawBody, headers, LanguageConsts.NameLessFileName);
BodyEncoding = ParseBodyEncoding(ContentType.CharSet);
ParseBody(rawBody);
}
#endregion
#region ParseBodyEncoding
///
/// Parses a character set into an encoding
///
/// The character set that needs to be parsed. is allowed.
/// The encoding specified by the parameter, or ASCII if the character set
/// was or empty
private static Encoding ParseBodyEncoding(string characterSet)
{
// Default encoding in Mime messages is US-ASCII
var encoding = Encoding.ASCII;
// If the character set was specified, find the encoding that the character
// set describes, and use that one instead
if (!string.IsNullOrEmpty(characterSet))
encoding = EncodingFinder.FindEncoding(characterSet);
return encoding;
}
#endregion
#region FindFileName
///
/// Figures out the filename of this message part.
/// property.
///
/// The body that needs to be parsed
/// The headers that should be used from the message
/// The default filename to use, if no other could be found
/// The filename found, or the default one if not such filename could be found in the headers
/// if is
private static string FindFileName(byte[] rawBody, MessageHeader headers, string defaultName)
{
if (headers == null)
throw new ArgumentNullException(nameof(headers));
if (headers.ContentDisposition?.FileName != null)
return FileManager.RemoveInvalidFileNameChars(headers.ContentDisposition.FileName);
var extensionFromContentType = string.Empty;
string contentTypeName = null;
if (headers.ContentType != null)
{
extensionFromContentType = MimeType.GetExtensionFromMimeType(headers.ContentType.MediaType);
contentTypeName = headers.ContentType.Name;
}
if (!string.IsNullOrEmpty(headers.ContentDescription))
return FileManager.RemoveInvalidFileNameChars(headers.ContentDescription + extensionFromContentType);
if (!string.IsNullOrEmpty(headers.Subject))
return FileManager.RemoveInvalidFileNameChars(headers.Subject) + extensionFromContentType;
if (extensionFromContentType.Equals(".eml", StringComparison.OrdinalIgnoreCase))
{
try
{
var message = new Message(rawBody);
if (!string.IsNullOrEmpty(message.Headers?.Subject))
return FileManager.RemoveInvalidFileNameChars(message.Headers.Subject) + extensionFromContentType;
}
// ReSharper disable once EmptyGeneralCatchClause
catch { }
}
return !string.IsNullOrEmpty(contentTypeName)
? FileManager.RemoveInvalidFileNameChars(contentTypeName)
: FileManager.RemoveInvalidFileNameChars(defaultName + extensionFromContentType);
}
#endregion
#region ParseBody
///
/// Parses a byte array as a body of an email message.
///
/// The byte array to parse as body of an email message. This array may not contain headers.
private void ParseBody(byte[] rawBody)
{
if (IsMultiPart)
{
// Parses a MultiPart message
ParseMultiPartBody(rawBody);
}
else
{
// Parses a non MultiPart message
// Decode the body accordingly and set the Body property
Body = DecodeBody(rawBody, ContentTransferEncoding);
}
}
#endregion
#region ParseMultiPartBody
///
/// Parses the byte array as a MultiPart message.
/// It is not valid to call this method if returned .
/// Fills the property of this .
///
/// The byte array which is to be parsed as a MultiPart message
private void ParseMultiPartBody(byte[] rawBody)
{
// Fetch out the boundary used to delimit the messages within the body
var multipartBoundary = ContentType.Boundary;
// Fetch the individual MultiPart message parts using the MultiPart boundary
var bodyParts = GetMultiPartParts(rawBody, multipartBoundary);
// Initialize the MessageParts property, with room to as many bodies as we have found
MessageParts = new List(bodyParts.Count);
// Now parse each byte array as a message body and add it the the MessageParts property
foreach (var bodyPart in bodyParts)
{
var messagePart = GetMessagePart(bodyPart);
MessageParts.Add(messagePart);
}
}
#endregion
#region GetMessagePart
///
/// Given a byte array describing a full message.
/// Parses the byte array into a .
///
/// The byte array containing both headers and body of a message
/// A which was described by the byte array
private static MessagePart GetMessagePart(byte[] rawMessageContent)
{
// Find the headers and the body parts of the byte array
HeaderExtractor.ExtractHeadersAndBody(rawMessageContent, out var headers, out var body);
// Create a new MessagePart from the headers and the body
return new MessagePart(body, headers);
}
#endregion
#region GetMultiPartParts
///
/// Gets a list of byte arrays where each entry in the list is a full message of a message part
///
/// The raw byte array describing the body of a message which is a MultiPart message
/// The delimiter that splits the different MultiPart bodies from each other
/// A list of byte arrays, each a full message of a
/// If is
private static List GetMultiPartParts(byte[] rawBody, string multiPartBoundary)
{
if (rawBody == null)
throw new ArgumentNullException(nameof(rawBody));
// This is the list we want to return
var messageBodies = new List();
// Create a stream from which we can find MultiPart boundaries
using (var memoryStream = new MemoryStream(rawBody))
{
// Find the start of the first message in this multipart
// Since the method returns the first character on a the line containing the MultiPart boundary, we
// need to add the MultiPart boundary with prepended "--" and appended CRLF pair to the position returned.
var startLocation =
FindPositionOfNextMultiPartBoundary(memoryStream, multiPartBoundary,
out var lastMultipartBoundaryEncountered) +
("--" + multiPartBoundary + "\r\n").Length;
while (true)
{
// When we have just parsed the last multipart entry, stop parsing on
if (lastMultipartBoundaryEncountered)
break;
// Find the end location of the current multipart
// Since the method returns the first character on a the line containing the MultiPart boundary, we
// need to go a CRLF pair back, so that we do not get that into the body of the message part
var stopLocation =
FindPositionOfNextMultiPartBoundary(memoryStream, multiPartBoundary,
out lastMultipartBoundaryEncountered) -
"\r\n".Length;
// If we could not find the next multipart boundary, but we had not yet discovered the last boundary, then
// we will consider the rest of the bytes as contained in a last message part.
if (stopLocation <= -1)
{
// Include everything except the last CRLF.
stopLocation = (int) memoryStream.Length - "\r\n".Length;
// We consider this as the last part
lastMultipartBoundaryEncountered = true;
// Special case: when the last multipart delimiter is not ending with "--", but is indeed the last
// one, then the next multipart would contain nothing, and we should not include such one.
if (startLocation >= stopLocation)
break;
}
// Special case: empty part.
// skipping by moving start location
if (startLocation >= stopLocation)
{
startLocation = stopLocation + ("\r\n" + "--" + multiPartBoundary + "\r\n").Length;
continue;
}
// We have now found the start and end of a message part
// Now we create a byte array with the correct length and put the message part's bytes into
// it and add it to our list we want to return
var length = stopLocation - startLocation;
var messageBody = new byte[length];
Array.Copy(rawBody, startLocation, messageBody, 0, length);
messageBodies.Add(messageBody);
// We want to advance to the next message parts start.
// We can find this by jumping forward the MultiPart boundary from the last
// message parts end position
startLocation = stopLocation + ("\r\n" + "--" + multiPartBoundary + "\r\n").Length;
}
}
// We are done
return messageBodies;
}
#endregion
#region FindPositionOfNextMultiPartBoundary
///
/// Method that is able to find a specific MultiPart boundary in a Stream.
/// The Stream passed should not be used for anything else then for looking for MultiPart boundaries
/// The stream to find the next MultiPart boundary in. Do not use it for anything else then with this method.
/// The MultiPart boundary to look for. This should be found in the header
/// Is set to if the next MultiPart boundary was indicated to be the last one, by having -- appended to it. Otherwise set to
///
/// The position of the first character of the line that contained MultiPartBoundary or -1 if no (more) MultiPart boundaries was found
private static int FindPositionOfNextMultiPartBoundary(Stream stream, string multiPartBoundary,
out bool lastMultipartBoundaryFound)
{
lastMultipartBoundaryFound = false;
while (true)
{
// Get the current position. This is the first position on the line - no characters of the line will
// have been read yet
var currentPos = (int) stream.Position;
// Read the line
var line = StreamUtility.ReadLineAsAscii(stream);
// If we kept reading until there was no more lines, we did not meet
// the MultiPart boundary. -1 is then returned to describe this.
if (line == null)
return -1;
// The MultiPart boundary is the MultiPartBoundary with "--" in front of it
// which is to be at the very start of a line
if (!line.StartsWith("--" + multiPartBoundary, StringComparison.Ordinal)) continue;
// Check if the found boundary was also the last one
lastMultipartBoundaryFound = line.StartsWith("--" + multiPartBoundary + "--",
StringComparison.OrdinalIgnoreCase);
return currentPos;
}
}
#endregion
#region DecodeBody
///
/// Decodes a byte array into another byte array based upon the Content Transfer encoding
///
/// The byte array to decode into another byte array
/// The of the byte array
/// A byte array which comes from the being used on the
/// If is
/// Thrown if the is unsupported
private static byte[] DecodeBody(byte[] messageBody, ContentTransferEncoding contentTransferEncoding)
{
if (messageBody == null)
throw new ArgumentNullException(nameof(messageBody));
switch (contentTransferEncoding)
{
case ContentTransferEncoding.QuotedPrintable:
// If encoded in QuotedPrintable, everything in the body is in US-ASCII
return QuotedPrintable.DecodeContentTransferEncoding(Encoding.ASCII.GetString(messageBody));
case ContentTransferEncoding.Base64:
// If encoded in Base64, everything in the body is in US-ASCII
return Base64.Decode(Encoding.ASCII.GetString(messageBody));
case ContentTransferEncoding.SevenBit:
case ContentTransferEncoding.Binary:
case ContentTransferEncoding.EightBit:
// We do not have to do anything
return messageBody;
default:
throw new ArgumentOutOfRangeException(nameof(contentTransferEncoding));
}
}
#endregion
#region GetBodyAsText
///
/// Gets this MessagePart's as text.
/// This is simply the being used on the raw bytes of the property.
/// This method is only valid to call if it is not a MultiPart message and therefore contains a body.
///
/// The property as a string
public string GetBodyAsText()
{
return BodyEncoding.GetString(Body);
}
#endregion
#region Save
///
/// Save this 's contents to a file.
/// There are no methods to reload the file.
///
/// The File location to save the to. Existent files will be overwritten.
/// If is
/// Other exceptions relevant to using a might be thrown as well
public void Save(FileInfo file)
{
if (file == null)
throw new ArgumentNullException(nameof(file));
using (var fileStream = new FileStream(file.FullName, FileMode.Create))
{
Save(fileStream);
}
}
///
/// Save this 's contents to a stream.
///
/// The stream to write to
/// If is
/// Other exceptions relevant to might be thrown as well
public void Save(Stream messageStream)
{
if (messageStream == null)
throw new ArgumentNullException(nameof(messageStream));
messageStream.Write(Body, 0, Body.Length);
}
#endregion
}
}