using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace MsgReader.Mime.Decode
{
///
/// Utility class used by OpenPop for mapping from a characterSet to an .
///
/// The functionality of the class can be altered by adding mappings
/// using and by adding a .
///
/// Given a characterSet, it will try to find the Encoding as follows:
///
/// -
/// If a mapping for the characterSet was added, use the specified Encoding from there. Mappings can be added using .
///
/// -
/// Try to parse the characterSet and look it up using for codepages or for named encodings.
///
/// -
/// If an encoding is not found yet, use the if defined. The is user defined.
///
///
///
public static class EncodingFinder
{
#region Proprties
///
/// Delegate that is used when the EncodingFinder is unable to find an encoding by
/// using the or general code.
/// This is used as a last resort and can be used for setting a default encoding or
/// for finding an encoding on runtime for some .
///
/// The character set to find an encoding for.
/// An encoding for the or if none could be found.
public delegate Encoding FallbackDecoderDelegate(string characterSet);
///
/// Last resort decoder.
///
public static FallbackDecoderDelegate FallbackDecoder { private get; set; }
///
/// Mapping from charactersets to encodings.
///
private static Dictionary EncodingMap { get; set; }
#endregion
#region Constructor
///
/// Initialize the EncodingFinder
///
static EncodingFinder()
{
Reset();
}
#endregion
#region Reset
///
/// Used to reset this static class to facilite isolated unit testing.
///
internal static void Reset()
{
EncodingMap = new Dictionary();
FallbackDecoder = null;
// Some emails incorrectly specify the encoding as utf8, but it should have been utf-8.
AddMapping("utf8", Encoding.UTF8);
AddMapping("binary", Encoding.ASCII);
}
#endregion
#region FindEncoding
///
/// Parses a character set into an encoding.
///
/// The character set to parse
/// An encoding which corresponds to the character set
/// If is
internal static Encoding FindEncoding(string characterSet)
{
if (characterSet == null)
throw new ArgumentNullException(nameof(characterSet));
var charSetUpper = characterSet.ToUpperInvariant();
// Check if the characterSet is explicitly mapped to an encoding
if (EncodingMap.ContainsKey(charSetUpper))
return EncodingMap[charSetUpper];
// Try to generally find the encoding
try
{
if (!charSetUpper.Contains("WINDOWS") && !charSetUpper.Contains("CP"))
return Encoding.GetEncoding(characterSet);
// It seems the characterSet contains an codepage value, which we should use to parse the encoding
charSetUpper = charSetUpper.Replace("CP", ""); // Remove cp
charSetUpper = charSetUpper.Replace("WINDOWS", ""); // Remove windows
charSetUpper = charSetUpper.Replace("-", ""); // Remove - which could be used as cp-1554
// Now we hope the only thing left in the characterSet is numbers.
var codepageNumber = int.Parse(charSetUpper, CultureInfo.InvariantCulture);
return Encoding.GetEncoding(codepageNumber);
// It seems there is no codepage value in the characterSet. It must be a named encoding
}
catch (ArgumentException)
{
// The encoding could not be found generally.
// Try to use the FallbackDecoder if it is defined.
// Check if it is defined
if (FallbackDecoder == null)
throw; // It was not defined - throw catched exception
// Use the FallbackDecoder
var fallbackDecoderResult = FallbackDecoder(characterSet);
// Check if the FallbackDecoder had a solution
if (fallbackDecoderResult != null)
return fallbackDecoderResult;
// If no solution was found, throw catched exception
throw;
}
}
#endregion
#region AddMapping
///
/// Puts a mapping from to
/// into the 's internal mapping Dictionary.
///
/// The string that maps to the
/// The that should be mapped from
/// If is
/// If is
public static void AddMapping(string characterSet, Encoding encoding)
{
if (characterSet == null)
throw new ArgumentNullException(nameof(characterSet));
if (encoding == null)
throw new ArgumentNullException(nameof(encoding));
// Add the mapping using uppercase
EncodingMap.Add(characterSet.ToUpperInvariant(), encoding);
}
#endregion
}
}