You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
444 lines
18 KiB
444 lines
18 KiB
using System;
|
|
using System.Globalization;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace MsgReader.Mime.Decode
|
|
{
|
|
/// <summary>
|
|
/// Class used to decode RFC 2822 Date header fields.
|
|
/// </summary>
|
|
internal static class Rfc2822DateTime
|
|
{
|
|
#region Consts
|
|
//Constants
|
|
/// <summary>
|
|
/// Timezone formats that aren't +-hhmm, e.g. UTC, or K. See MatchEvaluator method for conversions
|
|
/// </summary>
|
|
private const string RegexOldTimezoneFormats = @"\b((UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|MSK|PDT)|([A-IK-Za-ik-z]))\b";
|
|
|
|
/// <summary>
|
|
/// Matches any +=hhmm timezone offset, e.g. +0100
|
|
/// </summary>
|
|
private const string RegexNewTimezoneFormats = @"[\+-](?<hours>\d\d)(?<minutes>\d\d)";
|
|
#endregion
|
|
|
|
#region Fields
|
|
/// <summary>
|
|
/// Custom DateTime formats - will be tried if cannot parse the dateInput string using the default method
|
|
/// Specified using formats at http://msdn.microsoft.com/en-us/library/8kb3ddd4%28v=vs.110%29.aspx
|
|
/// One format per string in the array
|
|
/// </summary>
|
|
public static string[] CustomDateTimeFormats { private get; set; }
|
|
#endregion
|
|
|
|
#region StringToDate
|
|
/// <summary>
|
|
/// Converts a string in RFC 2822 format into a <see cref="DateTime"/> object
|
|
/// </summary>
|
|
/// <param name="inputDate">The date to convert</param>
|
|
/// <returns>
|
|
/// A valid <see cref="DateTime"/> object, which represents the same time as the string that was converted.
|
|
/// If <paramref name="inputDate"/> is not a valid date representation, then <see cref="DateTime.MinValue"/> is returned.
|
|
/// </returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="inputDate"/> is <see langword="null"/></exception>
|
|
/// <exception cref="ArgumentException">If the <paramref name="inputDate"/> could not be parsed into a <see cref="DateTime"/> object</exception>
|
|
public static DateTime StringToDate(string inputDate)
|
|
{
|
|
if (inputDate == null)
|
|
throw new ArgumentNullException(nameof(inputDate));
|
|
|
|
// Handle very wrong date time format: Tue Feb 18 10:23:30 2014 (MSK)
|
|
inputDate = FixSpecialCases(inputDate);
|
|
|
|
// Old date specification allows comments and a lot of whitespace
|
|
inputDate = StripCommentsAndExcessWhitespace(inputDate);
|
|
|
|
try
|
|
{
|
|
// Extract the DateTime
|
|
var dateTime = ExtractDateTime(inputDate);
|
|
|
|
// Bail if we could not parse the date
|
|
if (dateTime == DateTime.MinValue)
|
|
return dateTime;
|
|
|
|
// Convert the date into UTC
|
|
dateTime = new DateTime(dateTime.Ticks, DateTimeKind.Utc);
|
|
|
|
// Adjust according to the time zone
|
|
dateTime = AdjustTimezone(dateTime, inputDate);
|
|
|
|
// Return the parsed date
|
|
return dateTime;
|
|
}
|
|
catch (FormatException e) // Convert.ToDateTime() Failure
|
|
{
|
|
throw new ArgumentException(
|
|
"Could not parse date: " + e.Message + ". Input was: \"" + inputDate + "\"", e);
|
|
}
|
|
catch (ArgumentException e)
|
|
{
|
|
throw new ArgumentException(
|
|
"Could not parse date: " + e.Message + ". Input was: \"" + inputDate + "\"", e);
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
#region AdjustTimezone
|
|
/// <summary>
|
|
/// Adjust the <paramref name="dateTime"/> object given according to the timezone specified in the <paramref name="dateInput"/>.
|
|
/// </summary>
|
|
/// <param name="dateTime">The date to alter</param>
|
|
/// <param name="dateInput">The input date, in which the timezone can be found</param>
|
|
/// <returns>An date altered according to the timezone</returns>
|
|
private static DateTime AdjustTimezone(DateTime dateTime, string dateInput)
|
|
{
|
|
// We know that the timezones are always in the last part of the date input
|
|
var parts = dateInput.Split(' ');
|
|
var lastPart = parts[parts.Length - 1];
|
|
|
|
// Convert timezones in older formats to [+-]dddd format.
|
|
lastPart = Regex.Replace(lastPart, RegexOldTimezoneFormats, MatchEvaluator);
|
|
|
|
// Find the timezone specification
|
|
// Example: Fri, 21 Nov 1997 09:55:06 -0600
|
|
// finds -0600
|
|
var match = Regex.Match(lastPart, RegexNewTimezoneFormats);
|
|
if (!match.Success) return dateTime;
|
|
|
|
// We have found that the timezone is in +dddd or -dddd format
|
|
// Add the number of hours and minutes to our found date
|
|
var hours = int.Parse(match.Groups["hours"].Value);
|
|
var minutes = int.Parse(match.Groups["minutes"].Value);
|
|
|
|
var factor = match.Value[0] == '+' ? -1 : 1;
|
|
|
|
dateTime = dateTime.AddHours(factor*hours);
|
|
dateTime = dateTime.AddMinutes(factor*minutes);
|
|
|
|
return dateTime;
|
|
// A timezone of -0000 is the same as doing nothing
|
|
}
|
|
#endregion
|
|
|
|
#region MatchEvaluator
|
|
/// <summary>
|
|
/// Convert timezones in older formats to [+-]dddd format.
|
|
/// </summary>
|
|
/// <param name="match">The match that was found</param>
|
|
/// <returns>The string to replace the matched string with</returns>
|
|
/// <remarks>
|
|
///
|
|
/// RFC 2822: http://www.rfc-base.org/rfc-2822.html
|
|
///
|
|
/// 4.3. Obsolete Date and Time
|
|
///
|
|
/// The syntax for the obsolete date format allows a 2 digit year in the
|
|
/// date field and allows for a list of alphabetic time zone
|
|
/// specifications that were used in earlier versions of this standard.
|
|
/// It also permits comments and folding white space between many of the
|
|
/// tokens.
|
|
///
|
|
/// obs-day-of-week = [CFWS] day-name [CFWS]
|
|
/// obs-year = [CFWS] 2*DIGIT [CFWS]
|
|
/// obs-month = CFWS month-name CFWS
|
|
/// obs-day = [CFWS] 1*2DIGIT [CFWS]
|
|
/// obs-hour = [CFWS] 2DIGIT [CFWS]
|
|
/// obs-minute = [CFWS] 2DIGIT [CFWS]
|
|
/// obs-second = [CFWS] 2DIGIT [CFWS]
|
|
/// obs-zone = "UT" / "GMT" / ; Universal Time
|
|
///
|
|
/// Resnick Standards Track [Page 31]
|
|
/// RFC 2822 Internet Message Format April 2001
|
|
///
|
|
/// ; North American UT
|
|
/// ; offsets
|
|
/// "EST" / "EDT" / ; Eastern: - 5/ - 4
|
|
/// "CST" / "CDT" / ; Central: - 6/ - 5
|
|
/// "MST" / "MDT" / ; Mountain: - 7/ - 6
|
|
/// "PST" / "PDT" / ; Pacific: - 8/ - 7
|
|
///
|
|
/// %d65-73 / ; Military zones - "A"
|
|
/// %d75-90 / ; through "I" and "K"
|
|
/// %d97-105 / ; through "Z", both
|
|
/// %d107-122 ; upper and lower case -- imported lower and upper
|
|
///
|
|
/// </remarks>
|
|
private static string MatchEvaluator(Match match)
|
|
{
|
|
if (!match.Success)
|
|
{
|
|
throw new ArgumentException("Match success are always true");
|
|
}
|
|
|
|
switch (match.Value)
|
|
{
|
|
// "A" through "I" and "a" through "i"
|
|
// are equivalent to "+0100" through "+0900" respectively
|
|
case "A":
|
|
case "a":
|
|
return "+0100";
|
|
case "B":
|
|
case "b":
|
|
return "+0200";
|
|
case "C":
|
|
case "c":
|
|
return "+0300";
|
|
case "D":
|
|
case "d":
|
|
return "+0400";
|
|
case "E":
|
|
case "e":
|
|
return "+0500";
|
|
case "F":
|
|
case "f":
|
|
return "+0600";
|
|
case "G":
|
|
case "g":
|
|
return "+0700";
|
|
case "H":
|
|
case "h":
|
|
return "+0800";
|
|
case "I":
|
|
case "i":
|
|
return "+0900";
|
|
|
|
// "K", "L", and "M" and "k", "l" and "m"
|
|
// are equivalent to "+1000", "+1100", and "+1200" respectively
|
|
case "K":
|
|
case "k":
|
|
return "+1000";
|
|
case "L":
|
|
case "l":
|
|
return "+1100";
|
|
case "M":
|
|
case "m":
|
|
return "+1200";
|
|
|
|
// "N" through "Y" and "n" through "y"
|
|
// are equivalent to "-0100" through "-1200" respectively
|
|
case "N":
|
|
case "n":
|
|
return "-0100";
|
|
case "O":
|
|
case "o":
|
|
return "-0200";
|
|
case "P":
|
|
case "p":
|
|
return "-0300";
|
|
case "Q":
|
|
case "q":
|
|
return "-0400";
|
|
case "R":
|
|
case "r":
|
|
return "-0500";
|
|
case "S":
|
|
case "s":
|
|
return "-0600";
|
|
case "T":
|
|
case "t":
|
|
return "-0700";
|
|
case "U":
|
|
case "u":
|
|
return "-0800";
|
|
case "V":
|
|
case "v":
|
|
return "-0900";
|
|
case "W":
|
|
case "w":
|
|
return "-1000";
|
|
case "X":
|
|
case "x":
|
|
return "-1100";
|
|
case "Y":
|
|
case "y":
|
|
return "-1200";
|
|
|
|
// "Z", "z", "UT" and "GMT"
|
|
// is equivalent to "+0000"
|
|
case "Z":
|
|
case "z":
|
|
case "UT":
|
|
case "GMT":
|
|
return "+0000";
|
|
|
|
// US time zones
|
|
case "EDT":
|
|
return "-0400"; // EDT is semantically equivalent to -0400
|
|
case "EST":
|
|
return "-0500"; // EST is semantically equivalent to -0500
|
|
case "CDT":
|
|
return "-0500"; // CDT is semantically equivalent to -0500
|
|
case "CST":
|
|
return "-0600"; // CST is semantically equivalent to -0600
|
|
case "MDT":
|
|
return "-0600"; // MDT is semantically equivalent to -0600
|
|
case "MST":
|
|
return "-0700"; // MST is semantically equivalent to -0700
|
|
case "PDT":
|
|
return "-0700"; // PDT is semantically equivalent to -0700
|
|
case "PST":
|
|
return "-0800"; // PST is semantically equivalent to -0800
|
|
|
|
// EU time zones
|
|
case "MSK":
|
|
return "+0400"; // MSK is semantically equivalent to +0400
|
|
|
|
default:
|
|
throw new ArgumentException("Unexpected input");
|
|
}
|
|
}
|
|
#endregion
|
|
|
|
#region ExtractDateTime
|
|
/// <summary>
|
|
/// Extracts the date and time parts from the <paramref name="dateInput"/>
|
|
/// </summary>
|
|
/// <param name="dateInput">The date input string, from which to extract the date and time parts</param>
|
|
/// <returns>The extracted date part or <see langword="DateTime.MinValue"/> if <paramref name="dateInput"/> is not recognized as a valid date.</returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="dateInput"/> is <see langword="null"/></exception>
|
|
private static DateTime ExtractDateTime(string dateInput)
|
|
{
|
|
if (dateInput == null)
|
|
throw new ArgumentNullException(nameof(dateInput));
|
|
|
|
// Matches the date and time part of a string
|
|
// Given string example: Fri, 21 Nov 1997 09:55:06 -0600
|
|
// Needs to find: 21 Nov 1997 09:55:06
|
|
|
|
// Seconds does not need to be specified
|
|
// Even though it is illigal, sometimes hours, minutes or seconds are only specified with one digit
|
|
|
|
// Year with 2 or 4 digits (1922 or 22)
|
|
const string year = @"(\d\d\d\d|\d\d)";
|
|
|
|
// Time with one or two digits for hour and minute and optinal seconds (06:04:06 or 6:4:6 or 06:04 or 6:4)
|
|
const string time = @"\d?\d:\d?\d(:\d?\d)?";
|
|
|
|
// Correct format is 21 Nov 1997 09:55:06
|
|
const string correctFormat = @"\d\d? .+ " + year + " " + time;
|
|
|
|
// Some uses incorrect format: 2012-1-1 12:30
|
|
const string incorrectFormat = year + @"-\d?\d-\d?\d " + time;
|
|
|
|
// Some uses incorrect format: 08-May-2012 16:52:30 +0100
|
|
const string correctFormatButWithDashes = @"\d\d?-[A-Za-z]{3}-" + year + " " + time;
|
|
|
|
// We allow both correct and incorrect format
|
|
const string joinedFormat = @"(" + correctFormat + ")|(" + incorrectFormat + ")|(" + correctFormatButWithDashes + ")";
|
|
|
|
var match = Regex.Match(dateInput, joinedFormat);
|
|
if (match.Success)
|
|
{
|
|
try
|
|
{
|
|
return Convert.ToDateTime(match.Value, CultureInfo.InvariantCulture);
|
|
}
|
|
catch (FormatException)
|
|
{
|
|
}
|
|
}
|
|
|
|
//If there are some custom formats
|
|
if (CustomDateTimeFormats == null) return DateTime.MinValue;
|
|
//If there is a timezone at the end, remove it
|
|
|
|
var strDate = dateInput.Trim();
|
|
if (strDate.Contains(" ")) //Check contains a space before getting the last part to prevent accessing index -1
|
|
{
|
|
var parts = strDate.Split(' ');
|
|
var lastPart = parts[parts.Length - 1];
|
|
|
|
// Convert timezones in older formats to [+-]dddd format.
|
|
lastPart = Regex.Replace(lastPart, RegexOldTimezoneFormats, MatchEvaluator);
|
|
|
|
// Find the timezone specification
|
|
// Example: Fri, 21 Nov 1997 09:55:06 -0600
|
|
// finds -0600
|
|
var timezoneMatch = Regex.Match(lastPart, RegexNewTimezoneFormats);
|
|
if (timezoneMatch.Success)
|
|
{
|
|
//This last part is a timezone, remove it
|
|
strDate = strDate.Substring(0, strDate.Length - parts[parts.Length - 1].Length).Trim(); //Use the length of the old last part
|
|
}
|
|
}
|
|
|
|
//Try and parse it as one of the custom formats
|
|
try
|
|
{
|
|
return DateTime.ParseExact(strDate, CustomDateTimeFormats, null, DateTimeStyles.None);
|
|
}
|
|
catch (FormatException)
|
|
{
|
|
}
|
|
|
|
return DateTime.MinValue;
|
|
}
|
|
#endregion
|
|
|
|
#region StripCommentsAndExcessWhitespace
|
|
/// <summary>
|
|
/// Strips and removes all comments and excessive whitespace from the string
|
|
/// </summary>
|
|
/// <param name="input">The input to strip from</param>
|
|
/// <returns>The stripped string</returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="input"/> is <see langword="null"/></exception>
|
|
private static string StripCommentsAndExcessWhitespace(string input)
|
|
{
|
|
if (input == null)
|
|
throw new ArgumentNullException(nameof(input));
|
|
|
|
// Strip out comments
|
|
// Also strips out nested comments
|
|
input = Regex.Replace(input, @"(\((?>\((?<C>)|\)(?<-C>)|.?)*(?(C)(?!))\))", "");
|
|
|
|
// Reduce any whitespace character to one space only
|
|
input = Regex.Replace(input, @"\s+", " ");
|
|
|
|
// Remove all initial whitespace
|
|
input = Regex.Replace(input, @"^\s+", "");
|
|
|
|
// Remove all ending whitespace
|
|
input = Regex.Replace(input, @"\s+$", "");
|
|
|
|
// Remove spaces at colons
|
|
// Example: 22: 33 : 44 => 22:33:44
|
|
input = Regex.Replace(input, @" ?: ?", ":");
|
|
|
|
return input;
|
|
}
|
|
#endregion
|
|
|
|
#region FixSpecialCases
|
|
/// <summary>
|
|
/// Converts date time string in very wrong date time format:
|
|
/// Tue Feb 18 10:23:30 2014 (MSK)
|
|
/// to
|
|
/// Feb 18 2014 10:23:30 MSK
|
|
/// </summary>
|
|
/// <param name="inputDate">The date to convert</param>
|
|
/// <returns>The corrected string</returns>
|
|
private static string FixSpecialCases(string inputDate)
|
|
{
|
|
const string weekDayPattern = "(?<weekDay>Mon|Tue|Wed|Thu|Fri|Sat|Sun)";
|
|
const string monthPattern = @"(?<month>[A-Za-z]+)";
|
|
const string dayPattern = @"(?<day>\d?\d)";
|
|
const string yearPattern = @"(?<year>\d\d\d\d)";
|
|
const string timePattern = @"(?<time>\d?\d:\d?\d(:\d?\d)?)";
|
|
const string timeZonePattern = @"(?<timeZone>[A-Z]{3})";
|
|
|
|
var incorrectFormat =
|
|
$@"{weekDayPattern} +{monthPattern} +{dayPattern} +{timePattern} +{yearPattern} +\({timeZonePattern}\)";
|
|
|
|
var match = Regex.Match(inputDate, incorrectFormat);
|
|
if (!match.Success) return inputDate;
|
|
var month = match.Groups["month"];
|
|
var day = match.Groups["day"];
|
|
var year = match.Groups["year"];
|
|
var time = match.Groups["time"];
|
|
var timeZone = match.Groups["timeZone"];
|
|
return $"{day} {month} {year} {time} {timeZone}";
|
|
}
|
|
#endregion
|
|
}
|
|
} |