using System; using System.Collections.Generic; using System.Text.RegularExpressions; using MsgReader.Mime.Decode; namespace MsgReader.Mime.Header { /// /// Class that hold information about one "Received:" header line.
///
/// Visit these RFCs for more information:
/// RFC 5321 section 4.4
/// RFC 4021 section 3.6.7
/// RFC 2822 section 3.6.7
/// RFC 2821 section 4.4
///
public class Received { #region Properties /// /// The date of this received line. /// Is if not present in the received header line. /// public DateTime Date { get; } /// /// A dictionary that contains the names and values of the /// received header line.
/// If the received header is invalid and contained one name /// multiple times, the first one is used and the rest is ignored. ///
/// /// If the header lines looks like: /// /// from sending.com (localMachine [127.0.0.1]) by test.net (Postfix) /// /// then the dictionary will contain two keys: "from" and "by" with the values /// "sending.com (localMachine [127.0.0.1])" and "test.net (Postfix)". /// public Dictionary Names { get; } /// /// The raw input string that was parsed into this class. /// public string Raw { get; } #endregion #region Received /// /// Parses a Received header value. /// /// The value for the header to be parsed /// If is public Received(string headerValue) { if (headerValue == null) throw new ArgumentNullException(nameof(headerValue)); // Remember the raw input if someone whishes to use it Raw = headerValue; // Default Date value Date = DateTime.MinValue; // The date part is the last part of the string, and is preceeded by a semicolon // Some emails forgets to specify the date, therefore we need to check if it is there if (headerValue.Contains(";")) { var datePart = headerValue.Substring(headerValue.LastIndexOf(";", StringComparison.Ordinal) + 1); Date = Rfc2822DateTime.StringToDate(datePart); } Names = ParseDictionary(headerValue); } #endregion #region ParseDictionary /// /// Parses the Received header name-value-list into a dictionary. /// /// The full header value for the Received header /// A dictionary where the name-value-list has been parsed into private static Dictionary ParseDictionary(string headerValue) { var dictionary = new Dictionary(); // Remove the date part from the full headerValue if it is present var headerValueWithoutDate = headerValue; if (headerValue.Contains(";")) headerValueWithoutDate = headerValue.Substring(0, headerValue.LastIndexOf(";", StringComparison.Ordinal)); // Reduce any whitespace character to one space only headerValueWithoutDate = Regex.Replace(headerValueWithoutDate, @"\s+", " "); // The regex below should capture the following: // The name consists of non-whitespace characters followed by a whitespace and then the value follows. // There are multiple cases for the value part: // 1: Value is just some characters not including any whitespace // 2: Value is some characters, a whitespace followed by an unlimited number of // parenthesized values which can contain whitespaces, each delimited by whitespace // // Cheat sheet for regex: // \s means every whitespace character // [^\s] means every character except whitespace characters // +? is a non-greedy equivalent of + const string pattern = @"(?[^\s]+)\s(?[^\s]+(\s\(.+?\))*)"; // Find each match in the string var matches = Regex.Matches(headerValueWithoutDate, pattern); foreach (Match match in matches) { // Add the name and value part found in the matched result to the dictionary var name = match.Groups["name"].Value; var value = match.Groups["value"].Value; // Check if the name is really a comment. // In this case, the first entry in the header value // is a comment if (name.StartsWith("(")) continue; // Only add the first name pair // All subsequent pairs are ignored, as they are invalid anyway if (!dictionary.ContainsKey(name)) dictionary.Add(name, value); } return dictionary; } #endregion } }