You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
15 KiB
328 lines
15 KiB
using System;
|
|
using System.Collections.Generic;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace MsgReader.Mime.Decode
|
|
{
|
|
/// <summary>
|
|
/// This class is responsible for decoding parameters that has been encoded with:<br/>
|
|
/// <list type="bullet">
|
|
/// <item>
|
|
/// <b>Continuation</b><br/>
|
|
/// This is where a single parameter has such a long value that it could
|
|
/// be wrapped while in transit. Instead multiple parameters is used on each line.<br/>
|
|
/// <br/>
|
|
/// <b>Example</b><br/>
|
|
/// From: <c>Content-Type: text/html; boundary="someVeryLongStringHereWhichCouldBeWrappedInTransit"</c><br/>
|
|
/// To: <c>Content-Type: text/html; boundary*0="someVeryLongStringHere" boundary*1="WhichCouldBeWrappedInTransit"</c><br/>
|
|
/// </item>
|
|
/// <item>
|
|
/// <b>Encoding</b><br/>
|
|
/// Sometimes other characters then ASCII characters are needed in parameters.<br/>
|
|
/// The parameter is then given a different name to specify that it is encoded.<br/>
|
|
/// <br/>
|
|
/// <b>Example</b><br/>
|
|
/// From: <c>Content-Disposition attachment; filename="specialCharsÆØÅ"</c><br/>
|
|
/// To: <c>Content-Disposition attachment; filename*="ISO-8859-1'en-us'specialCharsC6D8C0"</c><br/>
|
|
/// This encoding is almost the same as <see cref="EncodedWord"/> encoding, and is used to decode the value.<br/>
|
|
/// </item>
|
|
/// <item>
|
|
/// <b>Continuation and Encoding</b><br/>
|
|
/// Both Continuation and Encoding can be used on the same time.<br/>
|
|
/// <br/>
|
|
/// <b>Example</b><br/>
|
|
/// From: <c>Content-Disposition attachment; filename="specialCharsÆØÅWhichIsSoLong"</c><br/>
|
|
/// To: <c>Content-Disposition attachment; filename*0*="ISO-8859-1'en-us'specialCharsC6D8C0"; filename*1*="WhichIsSoLong"</c><br/>
|
|
/// This could also be encoded as:<br/>
|
|
/// To: <c>Content-Disposition attachment; filename*0*="ISO-8859-1'en-us'specialCharsC6D8C0"; filename*1="WhichIsSoLong"</c><br/>
|
|
/// Notice that <c>filename*1</c> does not have an <c>*</c> after it - denoting it IS NOT encoded.<br/>
|
|
/// There are some rules about this:<br/>
|
|
/// <list type="number">
|
|
/// <item>The encoding must be mentioned in the first part (filename*0*), which has to be encoded.</item>
|
|
/// <item>No other part must specify an encoding, but if encoded it uses the encoding mentioned in the first part.</item>
|
|
/// <item>Parts may be encoded or not in any order.</item>
|
|
/// </list>
|
|
/// <br/>
|
|
/// </item>
|
|
/// </list>
|
|
/// More information and the specification is available in <see href="http://tools.ietf.org/html/rfc2231">RFC 2231</see>.
|
|
/// </summary>
|
|
internal static class Rfc2231Decoder
|
|
{
|
|
#region Decode
|
|
/// <summary>
|
|
/// Decodes a string of the form:<br/>
|
|
/// <c>value0; key1=value1; key2=value2; key3=value3</c><br/>
|
|
/// The returned List of key value pairs will have the key as key and the decoded value as value.<br/>
|
|
/// The first value0 will have a key of <see cref="string.Empty"/>.<br/>
|
|
/// <br/>
|
|
/// If continuation is used, then multiple keys will be merged into one key with the different values
|
|
/// decoded into on big value for that key.<br/>
|
|
/// Example:<br/>
|
|
/// <code>
|
|
/// title*0=part1
|
|
/// title*1=part2
|
|
/// </code>
|
|
/// will have key and value of:<br></br>
|
|
/// <c>title=decode(part1)decode(part2)</c>
|
|
/// </summary>
|
|
/// <param name="toDecode">The string to decode.</param>
|
|
/// <returns>A list of decoded key value pairs.</returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="toDecode"/> is <see langword="null"/></exception>
|
|
public static List<KeyValuePair<string, string>> Decode(string toDecode)
|
|
{
|
|
if (toDecode == null)
|
|
throw new ArgumentNullException(nameof(toDecode));
|
|
|
|
// Normalize the input to take account for missing semicolons after parameters.
|
|
// Example
|
|
// text/plain; charset=\"iso-8859-1\" name=\"somefile.txt\" or
|
|
// text/plain;\tcharset=\"iso-8859-1\"\tname=\"somefile.txt\"
|
|
// is normalized to
|
|
// text/plain; charset=\"iso-8859-1\"; name=\"somefile.txt\"
|
|
// Only works for parameters inside quotes
|
|
// \s = matches whitespace
|
|
toDecode = Regex.Replace(toDecode, "=\\s*\"(?<value>[^\"]*)\"\\s", "=\"${value}\"; ");
|
|
|
|
// Normalize
|
|
// Since the above only works for parameters inside quotes, we need to normalize
|
|
// the special case with the first parameter.
|
|
// Example:
|
|
// attachment filename="foo"
|
|
// is normalized to
|
|
// attachment; filename="foo"
|
|
// ^ = matches start of line (when not inside square bracets [])
|
|
toDecode = Regex.Replace(toDecode, @"^(?<first>[^;\s]+)\s(?<second>[^;\s]+)", "${first}; ${second}");
|
|
|
|
// Split by semicolon, but only if not inside quotes
|
|
var splitted = Utility.SplitStringWithCharNotInsideQuotes(toDecode.Trim(), ';');
|
|
|
|
var collection = new List<KeyValuePair<string, string>>(splitted.Count);
|
|
|
|
foreach (var part in splitted)
|
|
{
|
|
// Empty strings should not be processed
|
|
if (part.Trim().Length == 0)
|
|
continue;
|
|
|
|
var keyValue = part.Trim().Split(new[] {'='}, 2);
|
|
switch (keyValue.Length)
|
|
{
|
|
case 1:
|
|
collection.Add(new KeyValuePair<string, string>("", keyValue[0]));
|
|
break;
|
|
case 2:
|
|
collection.Add(new KeyValuePair<string, string>(keyValue[0], keyValue[1]));
|
|
break;
|
|
default:
|
|
throw new ArgumentException("When splitting the part \"" + part + "\" by = there was " +
|
|
keyValue.Length +
|
|
" parts. Only 1 and 2 are supported");
|
|
}
|
|
}
|
|
|
|
return DecodePairs(collection);
|
|
}
|
|
#endregion
|
|
|
|
#region DecodePairs
|
|
/// <summary>
|
|
/// Decodes the list of key value pairs into a decoded list of key value pairs.<br/>
|
|
/// There may be less keys in the decoded list, but then the values for the lost keys will have been appended
|
|
/// to the new key.
|
|
/// </summary>
|
|
/// <param name="pairs">The pairs to decode</param>
|
|
/// <returns>A decoded list of pairs</returns>
|
|
private static List<KeyValuePair<string, string>> DecodePairs(IList<KeyValuePair<string, string>> pairs)
|
|
{
|
|
if (pairs == null)
|
|
throw new ArgumentNullException(nameof(pairs));
|
|
|
|
var resultPairs = new List<KeyValuePair<string, string>>(pairs.Count);
|
|
|
|
var pairsCount = pairs.Count;
|
|
for (var i = 0; i < pairsCount; i++)
|
|
{
|
|
var currentPair = pairs[i];
|
|
var key = currentPair.Key;
|
|
var value = Utility.RemoveQuotesIfAny(currentPair.Value);
|
|
|
|
// Is it a continuation parameter? (encoded or not)
|
|
if (key.EndsWith("*0", StringComparison.OrdinalIgnoreCase) ||
|
|
key.EndsWith("*0*", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// This encoding will not be used if we get into the if which tells us
|
|
// that the whole continuation is not encoded
|
|
|
|
var encoding = "notEncoded - Value here is never used";
|
|
|
|
// Now lets find out if it is encoded too.
|
|
if (key.EndsWith("*0*", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// It is encoded.
|
|
|
|
// Fetch out the encoding for later use and decode the value
|
|
// If the value was not encoded as the email specified
|
|
// encoding will be set to null. This will be used later.
|
|
value = DecodeSingleValue(value, out encoding);
|
|
|
|
// Find the right key to use to store the full value
|
|
// Remove the start *0 which tells is it is a continuation, and the first one
|
|
// And remove the * afterwards which tells us it is encoded
|
|
key = key.Replace("*0*", "");
|
|
}
|
|
else
|
|
{
|
|
// It is not encoded, and no parts of the continuation is encoded either
|
|
|
|
// Find the right key to use to store the full value
|
|
// Remove the start *0 which tells is it is a continuation, and the first one
|
|
key = key.Replace("*0", "");
|
|
}
|
|
|
|
// The StringBuilder will hold the full decoded value from all continuation parts
|
|
var builder = new StringBuilder();
|
|
|
|
// Append the decoded value
|
|
builder.Append(value);
|
|
|
|
// Now go trough the next keys to see if they are part of the continuation
|
|
for (int j = i + 1, continuationCount = 1; j < pairsCount; j++, continuationCount++)
|
|
{
|
|
var jKey = pairs[j].Key;
|
|
var valueJKey = Utility.RemoveQuotesIfAny(pairs[j].Value);
|
|
|
|
if (jKey.Equals(key + "*" + continuationCount))
|
|
{
|
|
// This value part of the continuation is not encoded
|
|
// Therefore remove qoutes if any and add to our stringbuilder
|
|
builder.Append(valueJKey);
|
|
|
|
// Remember to increment i, as we have now treated one more KeyValuePair
|
|
i++;
|
|
}
|
|
else if (jKey.Equals(key + "*" + continuationCount + "*"))
|
|
{
|
|
// We will not get into this part if the first part was not encoded
|
|
// Therefore the encoding will only be used if and only if the
|
|
// first part was encoded, in which case we have remembered the encoding used
|
|
|
|
// Sometimes an email creator says that a string was encoded, but it really
|
|
// `was not. This is to catch that problem.
|
|
if (encoding != null)
|
|
{
|
|
// This value part of the continuation is encoded
|
|
// the encoding is not given in the current value,
|
|
// but was given in the first continuation, which we remembered for use here
|
|
valueJKey = DecodeSingleValue(valueJKey, encoding);
|
|
}
|
|
builder.Append(valueJKey);
|
|
|
|
// Remember to increment i, as we have now treated one more KeyValuePair
|
|
i++;
|
|
}
|
|
else
|
|
{
|
|
// No more keys for this continuation
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Add the key and the full value as a pair
|
|
value = builder.ToString();
|
|
resultPairs.Add(new KeyValuePair<string, string>(key, value));
|
|
}
|
|
else if (key.EndsWith("*", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// This parameter is only encoded - it is not part of a continuation
|
|
// We need to change the key from "<key>*" to "<key>" and decode the value
|
|
|
|
// To get the key we want, we remove the last * that denotes
|
|
// that the value hold by the key was encoded
|
|
key = key.Replace("*", "");
|
|
|
|
// Decode the value
|
|
string throwAway;
|
|
value = DecodeSingleValue(value, out throwAway);
|
|
|
|
// Now input the new value with the new key
|
|
resultPairs.Add(new KeyValuePair<string, string>(key, value));
|
|
}
|
|
else
|
|
{
|
|
// Fully normal key - the value is not encoded
|
|
// Therefore nothing to do, and we can simply pass the pair
|
|
// as being decoded now
|
|
resultPairs.Add(currentPair);
|
|
}
|
|
}
|
|
|
|
return resultPairs;
|
|
}
|
|
#endregion
|
|
|
|
#region DecodeSingleValue
|
|
/// <summary>
|
|
/// This will decode a single value of the form: <c>ISO-8859-1'en-us'%3D%3DIamHere</c><br/>
|
|
/// Which is basically a <see cref="EncodedWord"/> form just using % instead of =<br/>
|
|
/// Notice that 'en-us' part is not used for anything.<br/>
|
|
/// <br/>
|
|
/// If the single value given is not on the correct form, it will be returned without
|
|
/// being decoded and <paramref name="encodingUsed"/> will be set to <see langword="null"/>.
|
|
/// </summary>
|
|
/// <param name="encodingUsed">
|
|
/// The encoding used to decode with - it is given back for later use.<br/>
|
|
/// <see langword="null"/> if input was not in the correct form.
|
|
/// </param>
|
|
/// <param name="toDecode">The value to decode</param>
|
|
/// <returns>
|
|
/// The decoded value that corresponds to <paramref name="toDecode"/> or if
|
|
/// <paramref name="toDecode"/> is not on the correct form, it will be non-decoded.
|
|
/// </returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="toDecode"/> is <see langword="null"/></exception>
|
|
private static string DecodeSingleValue(string toDecode, out string encodingUsed)
|
|
{
|
|
if (toDecode == null)
|
|
throw new ArgumentNullException(nameof(toDecode));
|
|
|
|
// Check if input has a part describing the encoding
|
|
if (toDecode.IndexOf('\'') == -1)
|
|
{
|
|
// The input was not encoded (at least not valid) and it is returned as is
|
|
encodingUsed = null;
|
|
return toDecode;
|
|
}
|
|
encodingUsed = toDecode.Substring(0, toDecode.IndexOf('\''));
|
|
toDecode = toDecode.Substring(toDecode.LastIndexOf('\'') + 1);
|
|
return DecodeSingleValue(toDecode, encodingUsed);
|
|
}
|
|
#endregion
|
|
|
|
#region DecodeSingleValue
|
|
/// <summary>
|
|
/// This will decode a single value of the form: %3D%3DIamHere
|
|
/// Which is basically a <see cref="EncodedWord"/> form just using % instead of =
|
|
/// </summary>
|
|
/// <param name="valueToDecode">The value to decode</param>
|
|
/// <param name="encoding">The encoding used to decode with</param>
|
|
/// <returns>The decoded value that corresponds to <paramref name="valueToDecode"/></returns>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="valueToDecode"/> is <see langword="null"/></exception>
|
|
/// <exception cref="ArgumentNullException">If <paramref name="encoding"/> is <see langword="null"/></exception>
|
|
private static string DecodeSingleValue(string valueToDecode, string encoding)
|
|
{
|
|
if (valueToDecode == null)
|
|
throw new ArgumentNullException(nameof(valueToDecode));
|
|
|
|
if (encoding == null)
|
|
throw new ArgumentNullException(nameof(encoding));
|
|
|
|
// The encoding used is the same as QuotedPrintable, we only
|
|
// need to change % to =
|
|
// And otherwise make it look like the correct EncodedWord encoding
|
|
valueToDecode = "=?" + encoding + "?Q?" + valueToDecode.Replace("%", "=") + "?=";
|
|
return EncodedWord.Decode(valueToDecode);
|
|
}
|
|
#endregion
|
|
}
|
|
} |