You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
9.6 KiB
218 lines
9.6 KiB
using System;
|
|
using System.Collections.Generic;
|
|
using System.Collections.Specialized;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using MsgReader.Helpers;
|
|
|
|
namespace MsgReader.Mime.Header
|
|
{
|
|
///<summary>
|
|
/// Utility class that divides a message into a body and a header.<br/>
|
|
/// The header is then parsed to a strongly typed <see cref="MessageHeader"/> object.
|
|
///</summary>
|
|
public static class HeaderExtractor
|
|
{
|
|
#region GetHeaders
|
|
/// <summary>
|
|
/// Extract the headers from the given headers string and gives it back
|
|
/// as a MessageHeader object
|
|
/// </summary>
|
|
/// <param name="headersString">The string with the header information</param>
|
|
public static MessageHeader GetHeaders(string headersString)
|
|
{
|
|
var headersUnparsedCollection = ExtractHeaders(headersString);
|
|
return new MessageHeader(headersUnparsedCollection);
|
|
}
|
|
#endregion
|
|
|
|
#region FindHeaderEndPosition
|
|
/// <summary>
|
|
/// Find the end of the header section in a byte array.<br/>
|
|
/// The headers have ended when a blank line is found
|
|
/// </summary>
|
|
/// <param name="messageContent">The full message stored as a byte array</param>
|
|
/// <returns>The position of the line just after the header end blank line</returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="messageContent"/> is <see langword="null"/></exception>
|
|
private static int FindHeaderEndPosition(byte[] messageContent)
|
|
{
|
|
if (messageContent == null)
|
|
throw new ArgumentNullException(nameof(messageContent));
|
|
|
|
// Convert the byte array into a stream
|
|
using (Stream stream = new MemoryStream(messageContent))
|
|
{
|
|
while (true)
|
|
{
|
|
// Read a line from the stream. We know headers are in US-ASCII
|
|
// therefore it is not problem to read them as such
|
|
var line = StreamUtility.ReadLineAsAscii(stream);
|
|
|
|
// The end of headers is signaled when a blank line is found
|
|
// or if the line is null - in which case the email is actually an email with
|
|
// only headers but no body
|
|
if (string.IsNullOrEmpty(line))
|
|
return (int) stream.Position;
|
|
}
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
#region ExtractHeadersAndBody
|
|
/// <summary>
|
|
/// Extract the header part and body part of a message.<br/>
|
|
/// The headers are then parsed to a strongly typed <see cref="MessageHeader"/> object.
|
|
/// </summary>
|
|
/// <param name="fullRawMessage">The full message in bytes where header and body needs to be extracted from</param>
|
|
/// <param name="headers">The extracted header parts of the message</param>
|
|
/// <param name="body">The body part of the message</param>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="fullRawMessage"/> is <see langword="null"/></exception>
|
|
public static void ExtractHeadersAndBody(byte[] fullRawMessage, out MessageHeader headers, out byte[] body)
|
|
{
|
|
if (fullRawMessage == null)
|
|
throw new ArgumentNullException(nameof(fullRawMessage));
|
|
|
|
// Find the end location of the headers
|
|
var endOfHeaderLocation = FindHeaderEndPosition(fullRawMessage);
|
|
|
|
// The headers are always in ASCII - therefore we can convert the header part into a string
|
|
// using US-ASCII encoding
|
|
//var headersString = Encoding.ASCII.GetString(fullRawMessage, 0, endOfHeaderLocation);
|
|
|
|
// MIME headers should aways be ASCII encoded, but sometimes they don't so we read then as UTF8.
|
|
// It should not make any difference if we do it this way because UTF-8 superseeds ASCII encoding
|
|
var headersString = Encoding.UTF8.GetString(fullRawMessage, 0, endOfHeaderLocation);
|
|
|
|
// Now parse the headers to a NameValueCollection
|
|
var headersUnparsedCollection = ExtractHeaders(headersString);
|
|
|
|
// Use the NameValueCollection to parse it into a strongly-typed MessageHeader header
|
|
headers = new MessageHeader(headersUnparsedCollection);
|
|
|
|
// Since we know where the headers end, we also know where the body is
|
|
// Copy the body part into the body parameter
|
|
body = new byte[fullRawMessage.Length - endOfHeaderLocation];
|
|
Array.Copy(fullRawMessage, endOfHeaderLocation, body, 0, body.Length);
|
|
}
|
|
#endregion
|
|
|
|
#region ExtractHeaders
|
|
/// <summary>
|
|
/// Method that takes a full message and extract the headers from it.
|
|
/// </summary>
|
|
/// <param name="messageContent">The message to extract headers from. Does not need the body part. Needs the empty headers end line.</param>
|
|
/// <returns>A collection of Name and Value pairs of headers</returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="messageContent"/> is <see langword="null"/></exception>
|
|
public static NameValueCollection ExtractHeaders(string messageContent)
|
|
{
|
|
if (messageContent == null)
|
|
throw new ArgumentNullException(nameof(messageContent));
|
|
|
|
var headers = new NameValueCollection();
|
|
|
|
using (var messageReader = new StringReader(messageContent))
|
|
{
|
|
// Read until all headers have ended.
|
|
// The headers ends when an empty line is encountered
|
|
// An empty message might actually not have an empty line, in which
|
|
// case the headers end with null value.
|
|
string line;
|
|
while (!string.IsNullOrEmpty(line = messageReader.ReadLine()))
|
|
{
|
|
// Split into name and value
|
|
var header = SeparateHeaderNameAndValue(line);
|
|
|
|
// First index is header name
|
|
var headerName = header.Key;
|
|
|
|
// Second index is the header value.
|
|
// Use a StringBuilder since the header value may be continued on the next line
|
|
var headerValue = new StringBuilder(header.Value);
|
|
|
|
// Keep reading until we would hit next header
|
|
// This if for handling multi line headers
|
|
while (IsMoreLinesInHeaderValue(messageReader))
|
|
{
|
|
// Unfolding is accomplished by simply removing any CRLF
|
|
// that is immediately followed by WSP
|
|
// This was done using ReadLine (it discards CRLF)
|
|
// See http://tools.ietf.org/html/rfc822#section-3.1.1 for more information
|
|
var moreHeaderValue = messageReader.ReadLine();
|
|
|
|
// If this exception is ever raised, there is an serious algorithm failure
|
|
// IsMoreLinesInHeaderValue does not return true if the next line does not exist
|
|
// This check is only included to stop the nagging "possibly null" code analysis hint
|
|
if (moreHeaderValue == null)
|
|
throw new ArgumentException("This will never happen");
|
|
|
|
// Simply append the line just read to the header value
|
|
headerValue.Append(moreHeaderValue);
|
|
}
|
|
|
|
// Now we have the name and full value. Add it
|
|
|
|
if (headers.AllKeys.Contains(headerName))
|
|
{
|
|
var value = headers[headerName];
|
|
value += "," + headerValue;
|
|
headers[headerName] = value;
|
|
}
|
|
else
|
|
headers.Add(headerName, headerValue.ToString());
|
|
}
|
|
}
|
|
|
|
return headers;
|
|
}
|
|
#endregion
|
|
|
|
#region IsMoreLinesInHeaderValue
|
|
/// <summary>
|
|
/// Check if the next line is part of the current header value we are parsing by
|
|
/// peeking on the next character of the <see cref="TextReader"/>.<br/>
|
|
/// This should only be called while parsing headers.
|
|
/// </summary>
|
|
/// <param name="reader">The reader from which the header is read from</param>
|
|
/// <returns><see langword="true"/> if multi-line header. <see langword="false"/> otherwise</returns>
|
|
private static bool IsMoreLinesInHeaderValue(TextReader reader)
|
|
{
|
|
var peek = reader.Peek();
|
|
if (peek == -1)
|
|
return false;
|
|
|
|
var peekChar = (char) peek;
|
|
|
|
// A multi line header must have a whitespace character
|
|
// on the next line if it is to be continued
|
|
return peekChar == ' ' || peekChar == '\t';
|
|
}
|
|
#endregion
|
|
|
|
#region SeparateHeaderNameAndValue
|
|
/// <summary>
|
|
/// Separate a full header line into a header name and a header value.
|
|
/// </summary>
|
|
/// <param name="rawHeader">The raw header line to be separated</param>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="rawHeader"/> is <see langword="null"/></exception>
|
|
internal static KeyValuePair<string, string> SeparateHeaderNameAndValue(string rawHeader)
|
|
{
|
|
if (rawHeader == null)
|
|
throw new ArgumentNullException(nameof(rawHeader));
|
|
|
|
var key = string.Empty;
|
|
var value = string.Empty;
|
|
|
|
var indexOfColon = rawHeader.IndexOf(':');
|
|
|
|
// Check if it is allowed to make substring calls
|
|
if (indexOfColon < 0 || rawHeader.Length < indexOfColon + 1)
|
|
return new KeyValuePair<string, string>(key, value);
|
|
key = rawHeader.Substring(0, indexOfColon).Trim();
|
|
value = rawHeader.Substring(indexOfColon + 1).Trim();
|
|
|
|
return new KeyValuePair<string, string>(key, value);
|
|
}
|
|
#endregion
|
|
}
|
|
} |