using System; using System.Collections.Generic; using System.IO; using System.Net.Mime; using System.Text; using MsgReader.Helpers; using MsgReader.Localization; using MsgReader.Mime.Decode; using MsgReader.Mime.Header; namespace MsgReader.Mime { /// /// A MessagePart is a part of an email message used to describe the whole email parse tree.
///
/// Email messages are tree structures:
/// Email messages may contain large tree structures, and the MessagePart are the nodes of the this structure.
/// A MessagePart may either be a leaf in the structure or a internal node with links to other MessageParts.
/// The root of the message tree is the class.
///
/// Leafs:
/// If a MessagePart is a leaf, the part is not a MultiPart message.
/// Leafs are where the contents of an email are placed.
/// This includes, but is not limited to: attachments, text or images referenced from HTML.
/// The content of an attachment can be fetched by using the property.
/// If you want to have the text version of a MessagePart, use the method which will
/// convert the into a string using the encoding the message was sent with.
///
/// Internal nodes:
/// If a MessagePart is an internal node in the email tree structure, then the part is a MultiPart message.
/// The property will then contain links to the parts it contain.
/// The property of the MessagePart will not be set.
///
/// See the example for a parsing example.
/// This class cannot be instantiated from outside the library. ///
/// /// This example illustrates how the message parse tree looks like given a specific message
///
/// The message source in this example is:
/// /// MIME-Version: 1.0 /// Content-Type: multipart/mixed; boundary="frontier" /// /// This is a message with multiple parts in MIME format. /// --frontier /// Content-Type: text/plain /// /// This is the body of the message. /// --frontier /// Content-Type: application/octet-stream /// Content-Transfer-Encoding: base64 /// /// PGh0bWw+CiAgPGHLYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg /// Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg== /// --frontier-- /// /// The tree will look as follows, where the content-type media type of the message is listed
/// /// - Message root /// - multipart/mixed MessagePart /// - text/plain MessagePart /// - application/octet-stream MessagePart /// /// It is possible to have more complex message trees like the following:
/// /// - Message root /// - multipart/mixed MessagePart /// - text/plain MessagePart /// - text/plain MessagePart /// - multipart/parallel /// - audio/basic /// - image/tiff /// - text/enriched /// - message/rfc822 /// /// But it is also possible to have very simple message trees like:
/// /// - Message root /// - text/plain /// ///
public class MessagePart { #region Fields private bool? _isInline; #endregion #region Properties /// /// The Content-Type header field.
///
/// If not set, the ContentType is created by the default "text/plain; charset=us-ascii" which is /// defined in RFC 2045 section 5.2.
///
/// If set, the default is overridden. ///
public ContentType ContentType { get; } /// /// A human readable description of the body
///
/// if no Content-Description header was present in the message.
///
public string ContentDescription { get; } /// /// This header describes the Content encoding during transfer.
///
/// If no Content-Transfer-Encoding header was present in the message, it is set /// to the default of SevenBit in accordance to the RFC. ///
/// See RFC 2045 section 6 for details public ContentTransferEncoding ContentTransferEncoding { get; } /// /// ID of the content part (like an attached image). Used with MultiPart messages.
///
/// if no Content-ID header field was present in the message. ///
public string ContentId { get; } /// /// Used to describe if a is to be displayed or to be though of as an attachment.
/// Also contains information about filename if such was sent.
///
/// if no Content-Disposition header field was present in the message ///
public ContentDisposition ContentDisposition { get; } /// /// This is the encoding used to parse the message body if the
/// is not a MultiPart message. It is derived from the character set property. ///
public Encoding BodyEncoding { get; } /// /// This is the parsed body of this .
/// It is parsed in that way, if the body was ContentTransferEncoded, it has been decoded to the /// correct bytes.
///
/// It will be if this is a MultiPart message.
/// Use to check if this is a MultiPart message. ///
public byte[] Body { get; private set; } /// /// This will be set to true if this is the first found Text . This way it /// indicates that this is the text variant of the E-mail body. /// internal bool IsTextBody { get; set; } /// /// This will be set to true if this is the first found Html . This way it /// indicates that this is the html variant of the E-mail body. /// internal bool IsHtmlBody { get; set; } /// /// Describes if this is a MultiPart message
///
/// The is a MultiPart message if the media type property starts with "multipart/" ///
public bool IsMultiPart => ContentType.MediaType.StartsWith("multipart/", StringComparison.OrdinalIgnoreCase); /// /// A is considered to be holding text in it's body if the MediaType /// starts either "text/" or is equal to "message/rfc822" /// public bool IsText { get { var mediaType = ContentType.MediaType; return mediaType.StartsWith("text/", StringComparison.OrdinalIgnoreCase) || mediaType.Equals("message/rfc822", StringComparison.OrdinalIgnoreCase); } } /// /// A is considered to be an inline attachment, if
/// it is has the Inline set to True ///
public bool IsInline { get { if (_isInline.HasValue) return _isInline.Value; _isInline = ContentDisposition != null && ContentDisposition.Inline; return _isInline.Value; } internal set => _isInline = value; } /// /// A is considered to be an attachment, if
/// - it is not holding text and is not a MultiPart message
/// or
/// - it has a Content-Disposition header that says it is an attachment ///
public bool IsAttachment { get { // Inline is the opposite of attachment if (IsHtmlBody) return false; if (IsTextBody) return false; return !IsMultiPart; } } /// /// This is a convenient-property for figuring out a FileName for this .
/// If the is a MultiPart message, then it makes no sense to try to find a FileName.
///
/// The FileName can be specified in the , or /// properties.
/// If none of these places two places tells about the FileName, a default is returned. ///
public string FileName { get; } /// /// If this is a MultiPart message, then this property /// has a list of each of the Multiple parts that the message consists of.
///
/// It is if it is not a MultiPart message.
/// Use to check if this is a MultiPart message. ///
public List MessageParts { get; private set; } #endregion #region Constructors /// /// Used to construct the topmost message part /// /// The body that needs to be parsed /// The headers that should be used from the message /// If or /// is internal MessagePart(byte[] rawBody, MessageHeader headers) { if(rawBody == null) throw new ArgumentNullException(nameof(rawBody)); if(headers == null) throw new ArgumentNullException(nameof(headers)); ContentType = headers.ContentType; ContentDescription = headers.ContentDescription; ContentTransferEncoding = headers.ContentTransferEncoding; ContentId = headers.ContentId; ContentDisposition = headers.ContentDisposition; FileName = FindFileName(rawBody, headers, LanguageConsts.NameLessFileName); BodyEncoding = ParseBodyEncoding(ContentType.CharSet); ParseBody(rawBody); } #endregion #region ParseBodyEncoding /// /// Parses a character set into an encoding /// /// The character set that needs to be parsed. is allowed. /// The encoding specified by the parameter, or ASCII if the character set /// was or empty private static Encoding ParseBodyEncoding(string characterSet) { // Default encoding in Mime messages is US-ASCII var encoding = Encoding.ASCII; // If the character set was specified, find the encoding that the character // set describes, and use that one instead if (!string.IsNullOrEmpty(characterSet)) encoding = EncodingFinder.FindEncoding(characterSet); return encoding; } #endregion #region FindFileName /// /// Figures out the filename of this message part. /// property. /// /// The body that needs to be parsed /// The headers that should be used from the message /// The default filename to use, if no other could be found /// The filename found, or the default one if not such filename could be found in the headers /// if is private static string FindFileName(byte[] rawBody, MessageHeader headers, string defaultName) { if (headers == null) throw new ArgumentNullException(nameof(headers)); if (headers.ContentDisposition?.FileName != null) return FileManager.RemoveInvalidFileNameChars(headers.ContentDisposition.FileName); var extensionFromContentType = string.Empty; string contentTypeName = null; if (headers.ContentType != null) { extensionFromContentType = MimeType.GetExtensionFromMimeType(headers.ContentType.MediaType); contentTypeName = headers.ContentType.Name; } if (!string.IsNullOrEmpty(headers.ContentDescription)) return FileManager.RemoveInvalidFileNameChars(headers.ContentDescription + extensionFromContentType); if (!string.IsNullOrEmpty(headers.Subject)) return FileManager.RemoveInvalidFileNameChars(headers.Subject) + extensionFromContentType; if (extensionFromContentType.Equals(".eml", StringComparison.OrdinalIgnoreCase)) { try { var message = new Message(rawBody); if (!string.IsNullOrEmpty(message.Headers?.Subject)) return FileManager.RemoveInvalidFileNameChars(message.Headers.Subject) + extensionFromContentType; } // ReSharper disable once EmptyGeneralCatchClause catch { } } return !string.IsNullOrEmpty(contentTypeName) ? FileManager.RemoveInvalidFileNameChars(contentTypeName) : FileManager.RemoveInvalidFileNameChars(defaultName + extensionFromContentType); } #endregion #region ParseBody /// /// Parses a byte array as a body of an email message. /// /// The byte array to parse as body of an email message. This array may not contain headers. private void ParseBody(byte[] rawBody) { if (IsMultiPart) { // Parses a MultiPart message ParseMultiPartBody(rawBody); } else { // Parses a non MultiPart message // Decode the body accordingly and set the Body property Body = DecodeBody(rawBody, ContentTransferEncoding); } } #endregion #region ParseMultiPartBody /// /// Parses the byte array as a MultiPart message.
/// It is not valid to call this method if returned .
/// Fills the property of this . ///
/// The byte array which is to be parsed as a MultiPart message private void ParseMultiPartBody(byte[] rawBody) { // Fetch out the boundary used to delimit the messages within the body var multipartBoundary = ContentType.Boundary; // Fetch the individual MultiPart message parts using the MultiPart boundary var bodyParts = GetMultiPartParts(rawBody, multipartBoundary); // Initialize the MessageParts property, with room to as many bodies as we have found MessageParts = new List(bodyParts.Count); // Now parse each byte array as a message body and add it the the MessageParts property foreach (var bodyPart in bodyParts) { var messagePart = GetMessagePart(bodyPart); MessageParts.Add(messagePart); } } #endregion #region GetMessagePart /// /// Given a byte array describing a full message.
/// Parses the byte array into a . ///
/// The byte array containing both headers and body of a message /// A which was described by the byte array private static MessagePart GetMessagePart(byte[] rawMessageContent) { // Find the headers and the body parts of the byte array HeaderExtractor.ExtractHeadersAndBody(rawMessageContent, out var headers, out var body); // Create a new MessagePart from the headers and the body return new MessagePart(body, headers); } #endregion #region GetMultiPartParts /// /// Gets a list of byte arrays where each entry in the list is a full message of a message part /// /// The raw byte array describing the body of a message which is a MultiPart message /// The delimiter that splits the different MultiPart bodies from each other /// A list of byte arrays, each a full message of a /// If is private static List GetMultiPartParts(byte[] rawBody, string multiPartBoundary) { if (rawBody == null) throw new ArgumentNullException(nameof(rawBody)); // This is the list we want to return var messageBodies = new List(); // Create a stream from which we can find MultiPart boundaries using (var memoryStream = new MemoryStream(rawBody)) { // Find the start of the first message in this multipart // Since the method returns the first character on a the line containing the MultiPart boundary, we // need to add the MultiPart boundary with prepended "--" and appended CRLF pair to the position returned. var startLocation = FindPositionOfNextMultiPartBoundary(memoryStream, multiPartBoundary, out var lastMultipartBoundaryEncountered) + ("--" + multiPartBoundary + "\r\n").Length; while (true) { // When we have just parsed the last multipart entry, stop parsing on if (lastMultipartBoundaryEncountered) break; // Find the end location of the current multipart // Since the method returns the first character on a the line containing the MultiPart boundary, we // need to go a CRLF pair back, so that we do not get that into the body of the message part var stopLocation = FindPositionOfNextMultiPartBoundary(memoryStream, multiPartBoundary, out lastMultipartBoundaryEncountered) - "\r\n".Length; // If we could not find the next multipart boundary, but we had not yet discovered the last boundary, then // we will consider the rest of the bytes as contained in a last message part. if (stopLocation <= -1) { // Include everything except the last CRLF. stopLocation = (int) memoryStream.Length - "\r\n".Length; // We consider this as the last part lastMultipartBoundaryEncountered = true; // Special case: when the last multipart delimiter is not ending with "--", but is indeed the last // one, then the next multipart would contain nothing, and we should not include such one. if (startLocation >= stopLocation) break; } // Special case: empty part. // skipping by moving start location if (startLocation >= stopLocation) { startLocation = stopLocation + ("\r\n" + "--" + multiPartBoundary + "\r\n").Length; continue; } // We have now found the start and end of a message part // Now we create a byte array with the correct length and put the message part's bytes into // it and add it to our list we want to return var length = stopLocation - startLocation; var messageBody = new byte[length]; Array.Copy(rawBody, startLocation, messageBody, 0, length); messageBodies.Add(messageBody); // We want to advance to the next message parts start. // We can find this by jumping forward the MultiPart boundary from the last // message parts end position startLocation = stopLocation + ("\r\n" + "--" + multiPartBoundary + "\r\n").Length; } } // We are done return messageBodies; } #endregion #region FindPositionOfNextMultiPartBoundary /// /// Method that is able to find a specific MultiPart boundary in a Stream.
/// The Stream passed should not be used for anything else then for looking for MultiPart boundaries /// The stream to find the next MultiPart boundary in. Do not use it for anything else then with this method. /// The MultiPart boundary to look for. This should be found in the header /// Is set to if the next MultiPart boundary was indicated to be the last one, by having -- appended to it. Otherwise set to ///
/// The position of the first character of the line that contained MultiPartBoundary or -1 if no (more) MultiPart boundaries was found private static int FindPositionOfNextMultiPartBoundary(Stream stream, string multiPartBoundary, out bool lastMultipartBoundaryFound) { lastMultipartBoundaryFound = false; while (true) { // Get the current position. This is the first position on the line - no characters of the line will // have been read yet var currentPos = (int) stream.Position; // Read the line var line = StreamUtility.ReadLineAsAscii(stream); // If we kept reading until there was no more lines, we did not meet // the MultiPart boundary. -1 is then returned to describe this. if (line == null) return -1; // The MultiPart boundary is the MultiPartBoundary with "--" in front of it // which is to be at the very start of a line if (!line.StartsWith("--" + multiPartBoundary, StringComparison.Ordinal)) continue; // Check if the found boundary was also the last one lastMultipartBoundaryFound = line.StartsWith("--" + multiPartBoundary + "--", StringComparison.OrdinalIgnoreCase); return currentPos; } } #endregion #region DecodeBody /// /// Decodes a byte array into another byte array based upon the Content Transfer encoding /// /// The byte array to decode into another byte array /// The of the byte array /// A byte array which comes from the being used on the /// If is /// Thrown if the is unsupported private static byte[] DecodeBody(byte[] messageBody, ContentTransferEncoding contentTransferEncoding) { if (messageBody == null) throw new ArgumentNullException(nameof(messageBody)); switch (contentTransferEncoding) { case ContentTransferEncoding.QuotedPrintable: // If encoded in QuotedPrintable, everything in the body is in US-ASCII return QuotedPrintable.DecodeContentTransferEncoding(Encoding.ASCII.GetString(messageBody)); case ContentTransferEncoding.Base64: // If encoded in Base64, everything in the body is in US-ASCII return Base64.Decode(Encoding.ASCII.GetString(messageBody)); case ContentTransferEncoding.SevenBit: case ContentTransferEncoding.Binary: case ContentTransferEncoding.EightBit: // We do not have to do anything return messageBody; default: throw new ArgumentOutOfRangeException(nameof(contentTransferEncoding)); } } #endregion #region GetBodyAsText /// /// Gets this MessagePart's as text.
/// This is simply the being used on the raw bytes of the property.
/// This method is only valid to call if it is not a MultiPart message and therefore contains a body.
///
/// The property as a string public string GetBodyAsText() { return BodyEncoding.GetString(Body); } #endregion #region Save /// /// Save this 's contents to a file.
/// There are no methods to reload the file. ///
/// The File location to save the to. Existent files will be overwritten. /// If is /// Other exceptions relevant to using a might be thrown as well public void Save(FileInfo file) { if (file == null) throw new ArgumentNullException(nameof(file)); using (var fileStream = new FileStream(file.FullName, FileMode.Create)) { Save(fileStream); } } /// /// Save this 's contents to a stream.
///
/// The stream to write to /// If is /// Other exceptions relevant to might be thrown as well public void Save(Stream messageStream) { if (messageStream == null) throw new ArgumentNullException(nameof(messageStream)); messageStream.Write(Body, 0, Body.Length); } #endregion } }