using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.IO;
using System.Linq;
using System.Text;
using MsgReader.Helpers;
namespace MsgReader.Mime.Header
{
///
/// Utility class that divides a message into a body and a header.
/// The header is then parsed to a strongly typed object.
///
public static class HeaderExtractor
{
#region GetHeaders
///
/// Extract the headers from the given headers string and gives it back
/// as a MessageHeader object
///
/// The string with the header information
public static MessageHeader GetHeaders(string headersString)
{
var headersUnparsedCollection = ExtractHeaders(headersString);
return new MessageHeader(headersUnparsedCollection);
}
#endregion
#region FindHeaderEndPosition
///
/// Find the end of the header section in a byte array.
/// The headers have ended when a blank line is found
///
/// The full message stored as a byte array
/// The position of the line just after the header end blank line
/// If is
private static int FindHeaderEndPosition(byte[] messageContent)
{
if (messageContent == null)
throw new ArgumentNullException(nameof(messageContent));
// Convert the byte array into a stream
using (Stream stream = new MemoryStream(messageContent))
{
while (true)
{
// Read a line from the stream. We know headers are in US-ASCII
// therefore it is not problem to read them as such
var line = StreamUtility.ReadLineAsAscii(stream);
// The end of headers is signaled when a blank line is found
// or if the line is null - in which case the email is actually an email with
// only headers but no body
if (string.IsNullOrEmpty(line))
return (int) stream.Position;
}
}
}
#endregion
#region ExtractHeadersAndBody
///
/// Extract the header part and body part of a message.
/// The headers are then parsed to a strongly typed object.
///
/// The full message in bytes where header and body needs to be extracted from
/// The extracted header parts of the message
/// The body part of the message
/// If is
public static void ExtractHeadersAndBody(byte[] fullRawMessage, out MessageHeader headers, out byte[] body)
{
if (fullRawMessage == null)
throw new ArgumentNullException(nameof(fullRawMessage));
// Find the end location of the headers
var endOfHeaderLocation = FindHeaderEndPosition(fullRawMessage);
// The headers are always in ASCII - therefore we can convert the header part into a string
// using US-ASCII encoding
//var headersString = Encoding.ASCII.GetString(fullRawMessage, 0, endOfHeaderLocation);
// MIME headers should aways be ASCII encoded, but sometimes they don't so we read then as UTF8.
// It should not make any difference if we do it this way because UTF-8 superseeds ASCII encoding
var headersString = Encoding.UTF8.GetString(fullRawMessage, 0, endOfHeaderLocation);
// Now parse the headers to a NameValueCollection
var headersUnparsedCollection = ExtractHeaders(headersString);
// Use the NameValueCollection to parse it into a strongly-typed MessageHeader header
headers = new MessageHeader(headersUnparsedCollection);
// Since we know where the headers end, we also know where the body is
// Copy the body part into the body parameter
body = new byte[fullRawMessage.Length - endOfHeaderLocation];
Array.Copy(fullRawMessage, endOfHeaderLocation, body, 0, body.Length);
}
#endregion
#region ExtractHeaders
///
/// Method that takes a full message and extract the headers from it.
///
/// The message to extract headers from. Does not need the body part. Needs the empty headers end line.
/// A collection of Name and Value pairs of headers
/// If is
public static NameValueCollection ExtractHeaders(string messageContent)
{
if (messageContent == null)
throw new ArgumentNullException(nameof(messageContent));
var headers = new NameValueCollection();
using (var messageReader = new StringReader(messageContent))
{
// Read until all headers have ended.
// The headers ends when an empty line is encountered
// An empty message might actually not have an empty line, in which
// case the headers end with null value.
string line;
while (!string.IsNullOrEmpty(line = messageReader.ReadLine()))
{
// Split into name and value
var header = SeparateHeaderNameAndValue(line);
// First index is header name
var headerName = header.Key;
// Second index is the header value.
// Use a StringBuilder since the header value may be continued on the next line
var headerValue = new StringBuilder(header.Value);
// Keep reading until we would hit next header
// This if for handling multi line headers
while (IsMoreLinesInHeaderValue(messageReader))
{
// Unfolding is accomplished by simply removing any CRLF
// that is immediately followed by WSP
// This was done using ReadLine (it discards CRLF)
// See http://tools.ietf.org/html/rfc822#section-3.1.1 for more information
var moreHeaderValue = messageReader.ReadLine();
// If this exception is ever raised, there is an serious algorithm failure
// IsMoreLinesInHeaderValue does not return true if the next line does not exist
// This check is only included to stop the nagging "possibly null" code analysis hint
if (moreHeaderValue == null)
throw new ArgumentException("This will never happen");
// Simply append the line just read to the header value
headerValue.Append(moreHeaderValue);
}
// Now we have the name and full value. Add it
if (headers.AllKeys.Contains(headerName))
{
var value = headers[headerName];
value += "," + headerValue;
headers[headerName] = value;
}
else
headers.Add(headerName, headerValue.ToString());
}
}
return headers;
}
#endregion
#region IsMoreLinesInHeaderValue
///
/// Check if the next line is part of the current header value we are parsing by
/// peeking on the next character of the .
/// This should only be called while parsing headers.
///
/// The reader from which the header is read from
/// if multi-line header. otherwise
private static bool IsMoreLinesInHeaderValue(TextReader reader)
{
var peek = reader.Peek();
if (peek == -1)
return false;
var peekChar = (char) peek;
// A multi line header must have a whitespace character
// on the next line if it is to be continued
return peekChar == ' ' || peekChar == '\t';
}
#endregion
#region SeparateHeaderNameAndValue
///
/// Separate a full header line into a header name and a header value.
///
/// The raw header line to be separated
/// If is
internal static KeyValuePair SeparateHeaderNameAndValue(string rawHeader)
{
if (rawHeader == null)
throw new ArgumentNullException(nameof(rawHeader));
var key = string.Empty;
var value = string.Empty;
var indexOfColon = rawHeader.IndexOf(':');
// Check if it is allowed to make substring calls
if (indexOfColon < 0 || rawHeader.Length < indexOfColon + 1)
return new KeyValuePair(key, value);
key = rawHeader.Substring(0, indexOfColon).Trim();
value = rawHeader.Substring(indexOfColon + 1).Trim();
return new KeyValuePair(key, value);
}
#endregion
}
}