using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using MsgReader.Mime.Decode;
namespace MsgReader.Mime.Header
{
///
/// Class that hold information about one "Received:" header line.
///
/// Visit these RFCs for more information:
/// RFC 5321 section 4.4
/// RFC 4021 section 3.6.7
/// RFC 2822 section 3.6.7
/// RFC 2821 section 4.4
///
public class Received
{
#region Properties
///
/// The date of this received line.
/// Is if not present in the received header line.
///
public DateTime Date { get; }
///
/// A dictionary that contains the names and values of the
/// received header line.
/// If the received header is invalid and contained one name
/// multiple times, the first one is used and the rest is ignored.
///
///
/// If the header lines looks like:
///
/// from sending.com (localMachine [127.0.0.1]) by test.net (Postfix)
///
/// then the dictionary will contain two keys: "from" and "by" with the values
/// "sending.com (localMachine [127.0.0.1])" and "test.net (Postfix)".
///
public Dictionary Names { get; }
///
/// The raw input string that was parsed into this class.
///
public string Raw { get; }
#endregion
#region Received
///
/// Parses a Received header value.
///
/// The value for the header to be parsed
/// If is
public Received(string headerValue)
{
if (headerValue == null)
throw new ArgumentNullException(nameof(headerValue));
// Remember the raw input if someone whishes to use it
Raw = headerValue;
// Default Date value
Date = DateTime.MinValue;
// The date part is the last part of the string, and is preceeded by a semicolon
// Some emails forgets to specify the date, therefore we need to check if it is there
if (headerValue.Contains(";"))
{
var datePart = headerValue.Substring(headerValue.LastIndexOf(";", StringComparison.Ordinal) + 1);
Date = Rfc2822DateTime.StringToDate(datePart);
}
Names = ParseDictionary(headerValue);
}
#endregion
#region ParseDictionary
///
/// Parses the Received header name-value-list into a dictionary.
///
/// The full header value for the Received header
/// A dictionary where the name-value-list has been parsed into
private static Dictionary ParseDictionary(string headerValue)
{
var dictionary = new Dictionary();
// Remove the date part from the full headerValue if it is present
var headerValueWithoutDate = headerValue;
if (headerValue.Contains(";"))
headerValueWithoutDate = headerValue.Substring(0, headerValue.LastIndexOf(";", StringComparison.Ordinal));
// Reduce any whitespace character to one space only
headerValueWithoutDate = Regex.Replace(headerValueWithoutDate, @"\s+", " ");
// The regex below should capture the following:
// The name consists of non-whitespace characters followed by a whitespace and then the value follows.
// There are multiple cases for the value part:
// 1: Value is just some characters not including any whitespace
// 2: Value is some characters, a whitespace followed by an unlimited number of
// parenthesized values which can contain whitespaces, each delimited by whitespace
//
// Cheat sheet for regex:
// \s means every whitespace character
// [^\s] means every character except whitespace characters
// +? is a non-greedy equivalent of +
const string pattern = @"(?[^\s]+)\s(?[^\s]+(\s\(.+?\))*)";
// Find each match in the string
var matches = Regex.Matches(headerValueWithoutDate, pattern);
foreach (Match match in matches)
{
// Add the name and value part found in the matched result to the dictionary
var name = match.Groups["name"].Value;
var value = match.Groups["value"].Value;
// Check if the name is really a comment.
// In this case, the first entry in the header value
// is a comment
if (name.StartsWith("("))
continue;
// Only add the first name pair
// All subsequent pairs are ignored, as they are invalid anyway
if (!dictionary.ContainsKey(name))
dictionary.Add(name, value);
}
return dictionary;
}
#endregion
}
}