using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace MsgReader.Mime.Decode
{
///
/// This class is responsible for decoding parameters that has been encoded with:
///
/// -
/// Continuation
/// This is where a single parameter has such a long value that it could
/// be wrapped while in transit. Instead multiple parameters is used on each line.
///
/// Example
/// From: Content-Type: text/html; boundary="someVeryLongStringHereWhichCouldBeWrappedInTransit"
/// To: Content-Type: text/html; boundary*0="someVeryLongStringHere" boundary*1="WhichCouldBeWrappedInTransit"
///
/// -
/// Encoding
/// Sometimes other characters then ASCII characters are needed in parameters.
/// The parameter is then given a different name to specify that it is encoded.
///
/// Example
/// From: Content-Disposition attachment; filename="specialCharsÆØÅ"
/// To: Content-Disposition attachment; filename*="ISO-8859-1'en-us'specialCharsC6D8C0"
/// This encoding is almost the same as encoding, and is used to decode the value.
///
/// -
/// Continuation and Encoding
/// Both Continuation and Encoding can be used on the same time.
///
/// Example
/// From: Content-Disposition attachment; filename="specialCharsÆØÅWhichIsSoLong"
/// To: Content-Disposition attachment; filename*0*="ISO-8859-1'en-us'specialCharsC6D8C0"; filename*1*="WhichIsSoLong"
/// This could also be encoded as:
/// To: Content-Disposition attachment; filename*0*="ISO-8859-1'en-us'specialCharsC6D8C0"; filename*1="WhichIsSoLong"
/// Notice that filename*1 does not have an * after it - denoting it IS NOT encoded.
/// There are some rules about this:
///
/// - The encoding must be mentioned in the first part (filename*0*), which has to be encoded.
/// - No other part must specify an encoding, but if encoded it uses the encoding mentioned in the first part.
/// - Parts may be encoded or not in any order.
///
///
///
///
/// More information and the specification is available in RFC 2231.
///
internal static class Rfc2231Decoder
{
#region Decode
///
/// Decodes a string of the form:
/// value0; key1=value1; key2=value2; key3=value3
/// The returned List of key value pairs will have the key as key and the decoded value as value.
/// The first value0 will have a key of .
///
/// If continuation is used, then multiple keys will be merged into one key with the different values
/// decoded into on big value for that key.
/// Example:
///
/// title*0=part1
/// title*1=part2
///
/// will have key and value of:
/// title=decode(part1)decode(part2)
///
/// The string to decode.
/// A list of decoded key value pairs.
/// If is
public static List> Decode(string toDecode)
{
if (toDecode == null)
throw new ArgumentNullException(nameof(toDecode));
// Normalize the input to take account for missing semicolons after parameters.
// Example
// text/plain; charset=\"iso-8859-1\" name=\"somefile.txt\" or
// text/plain;\tcharset=\"iso-8859-1\"\tname=\"somefile.txt\"
// is normalized to
// text/plain; charset=\"iso-8859-1\"; name=\"somefile.txt\"
// Only works for parameters inside quotes
// \s = matches whitespace
toDecode = Regex.Replace(toDecode, "=\\s*\"(?[^\"]*)\"\\s", "=\"${value}\"; ");
// Normalize
// Since the above only works for parameters inside quotes, we need to normalize
// the special case with the first parameter.
// Example:
// attachment filename="foo"
// is normalized to
// attachment; filename="foo"
// ^ = matches start of line (when not inside square bracets [])
toDecode = Regex.Replace(toDecode, @"^(?[^;\s]+)\s(?[^;\s]+)", "${first}; ${second}");
// Split by semicolon, but only if not inside quotes
var splitted = Utility.SplitStringWithCharNotInsideQuotes(toDecode.Trim(), ';');
var collection = new List>(splitted.Count);
foreach (var part in splitted)
{
// Empty strings should not be processed
if (part.Trim().Length == 0)
continue;
var keyValue = part.Trim().Split(new[] {'='}, 2);
switch (keyValue.Length)
{
case 1:
collection.Add(new KeyValuePair("", keyValue[0]));
break;
case 2:
collection.Add(new KeyValuePair(keyValue[0], keyValue[1]));
break;
default:
throw new ArgumentException("When splitting the part \"" + part + "\" by = there was " +
keyValue.Length +
" parts. Only 1 and 2 are supported");
}
}
return DecodePairs(collection);
}
#endregion
#region DecodePairs
///
/// Decodes the list of key value pairs into a decoded list of key value pairs.
/// There may be less keys in the decoded list, but then the values for the lost keys will have been appended
/// to the new key.
///
/// The pairs to decode
/// A decoded list of pairs
private static List> DecodePairs(IList> pairs)
{
if (pairs == null)
throw new ArgumentNullException(nameof(pairs));
var resultPairs = new List>(pairs.Count);
var pairsCount = pairs.Count;
for (var i = 0; i < pairsCount; i++)
{
var currentPair = pairs[i];
var key = currentPair.Key;
var value = Utility.RemoveQuotesIfAny(currentPair.Value);
// Is it a continuation parameter? (encoded or not)
if (key.EndsWith("*0", StringComparison.OrdinalIgnoreCase) ||
key.EndsWith("*0*", StringComparison.OrdinalIgnoreCase))
{
// This encoding will not be used if we get into the if which tells us
// that the whole continuation is not encoded
var encoding = "notEncoded - Value here is never used";
// Now lets find out if it is encoded too.
if (key.EndsWith("*0*", StringComparison.OrdinalIgnoreCase))
{
// It is encoded.
// Fetch out the encoding for later use and decode the value
// If the value was not encoded as the email specified
// encoding will be set to null. This will be used later.
value = DecodeSingleValue(value, out encoding);
// Find the right key to use to store the full value
// Remove the start *0 which tells is it is a continuation, and the first one
// And remove the * afterwards which tells us it is encoded
key = key.Replace("*0*", "");
}
else
{
// It is not encoded, and no parts of the continuation is encoded either
// Find the right key to use to store the full value
// Remove the start *0 which tells is it is a continuation, and the first one
key = key.Replace("*0", "");
}
// The StringBuilder will hold the full decoded value from all continuation parts
var builder = new StringBuilder();
// Append the decoded value
builder.Append(value);
// Now go trough the next keys to see if they are part of the continuation
for (int j = i + 1, continuationCount = 1; j < pairsCount; j++, continuationCount++)
{
var jKey = pairs[j].Key;
var valueJKey = Utility.RemoveQuotesIfAny(pairs[j].Value);
if (jKey.Equals(key + "*" + continuationCount))
{
// This value part of the continuation is not encoded
// Therefore remove qoutes if any and add to our stringbuilder
builder.Append(valueJKey);
// Remember to increment i, as we have now treated one more KeyValuePair
i++;
}
else if (jKey.Equals(key + "*" + continuationCount + "*"))
{
// We will not get into this part if the first part was not encoded
// Therefore the encoding will only be used if and only if the
// first part was encoded, in which case we have remembered the encoding used
// Sometimes an email creator says that a string was encoded, but it really
// `was not. This is to catch that problem.
if (encoding != null)
{
// This value part of the continuation is encoded
// the encoding is not given in the current value,
// but was given in the first continuation, which we remembered for use here
valueJKey = DecodeSingleValue(valueJKey, encoding);
}
builder.Append(valueJKey);
// Remember to increment i, as we have now treated one more KeyValuePair
i++;
}
else
{
// No more keys for this continuation
break;
}
}
// Add the key and the full value as a pair
value = builder.ToString();
resultPairs.Add(new KeyValuePair(key, value));
}
else if (key.EndsWith("*", StringComparison.OrdinalIgnoreCase))
{
// This parameter is only encoded - it is not part of a continuation
// We need to change the key from "*" to "" and decode the value
// To get the key we want, we remove the last * that denotes
// that the value hold by the key was encoded
key = key.Replace("*", "");
// Decode the value
string throwAway;
value = DecodeSingleValue(value, out throwAway);
// Now input the new value with the new key
resultPairs.Add(new KeyValuePair(key, value));
}
else
{
// Fully normal key - the value is not encoded
// Therefore nothing to do, and we can simply pass the pair
// as being decoded now
resultPairs.Add(currentPair);
}
}
return resultPairs;
}
#endregion
#region DecodeSingleValue
///
/// This will decode a single value of the form: ISO-8859-1'en-us'%3D%3DIamHere
/// Which is basically a form just using % instead of =
/// Notice that 'en-us' part is not used for anything.
///
/// If the single value given is not on the correct form, it will be returned without
/// being decoded and will be set to .
///
///
/// The encoding used to decode with - it is given back for later use.
/// if input was not in the correct form.
///
/// The value to decode
///
/// The decoded value that corresponds to or if
/// is not on the correct form, it will be non-decoded.
///
/// If is
private static string DecodeSingleValue(string toDecode, out string encodingUsed)
{
if (toDecode == null)
throw new ArgumentNullException(nameof(toDecode));
// Check if input has a part describing the encoding
if (toDecode.IndexOf('\'') == -1)
{
// The input was not encoded (at least not valid) and it is returned as is
encodingUsed = null;
return toDecode;
}
encodingUsed = toDecode.Substring(0, toDecode.IndexOf('\''));
toDecode = toDecode.Substring(toDecode.LastIndexOf('\'') + 1);
return DecodeSingleValue(toDecode, encodingUsed);
}
#endregion
#region DecodeSingleValue
///
/// This will decode a single value of the form: %3D%3DIamHere
/// Which is basically a form just using % instead of =
///
/// The value to decode
/// The encoding used to decode with
/// The decoded value that corresponds to
/// If is
/// If is
private static string DecodeSingleValue(string valueToDecode, string encoding)
{
if (valueToDecode == null)
throw new ArgumentNullException(nameof(valueToDecode));
if (encoding == null)
throw new ArgumentNullException(nameof(encoding));
// The encoding used is the same as QuotedPrintable, we only
// need to change % to =
// And otherwise make it look like the correct EncodedWord encoding
valueToDecode = "=?" + encoding + "?Q?" + valueToDecode.Replace("%", "=") + "?=";
return EncodedWord.Decode(valueToDecode);
}
#endregion
}
}