You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

263 lines
9.7 KiB

/*
* 2006 - 2018 Ted Spence, http://tedspence.com
* License: http://www.apache.org/licenses/LICENSE-2.0
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Collections.Generic;
using System.Text;
namespace CSVNET
{
/// <summary>
/// Defines the behavior of CSV serialization when a nested array is encountered
/// </summary>
public enum ArrayOptions
{
/// <summary>
/// Use built-in string conversion, which renders arrays as `MyNamespace.MyObject[]`
/// </summary>
ToString,
/// <summary>
/// Convert any array columns that are array types into nulls (either blanks or null tokens)
/// </summary>
TreatAsNull,
/// <summary>
/// Render the number of items in the array
/// </summary>
CountItems,
/// <summary>
/// Serialize child arrays recursively using the same settings
/// </summary>
RecursiveSerialization,
}
/// <summary>
/// Defines the behavior of CSV Serialization when a nested object (class) is encountered
/// </summary>
public enum ObjectOptions
{
/// <summary>
/// Use built-in string conversion, which renders as `MyNamespace.MyObject`
/// </summary>
ToString,
/// <summary>
/// Serialize child objects recursively using the same settings
/// </summary>
RecursiveSerialization,
}
/// <summary>
/// Settings to configure how a CSV file is parsed
/// </summary>
public class CSVSettings
{
/// <summary>
/// The character used to delimit individual fields in the CSV.
/// </summary>
public char FieldDelimiter { get; set; } = ',';
/// <summary>
/// The character used to enclose fields that contain the delimiter character.
/// </summary>
public char TextQualifier { get; set; } = '"';
/// <summary>
/// The separator used to indicate the end of a line in the CSV file.
/// </summary>
public string LineSeparator { get; set; } = Environment.NewLine;
/// <summary>
/// Set this value to true to enclose all fields in the text qualifier character.
/// </summary>
public bool ForceQualifiers { get; set; }
/// <summary>
/// Set this value to true to allow nulls to be rendered in CSV files when serializing
/// and deserializing.
///
/// CSV files by default do not have a mechanism for differentiating between null fields
/// and empty fields. If this field is set to false, both `null` and empty string will
/// render as an empty string.
///
/// If this field is set to true, all non-null fields will be enclosed by the text qualifier,
/// and fields that are null will be represented as `NullToken`.
/// </summary>
public bool AllowNull { get; set; }
/// <summary>
/// If AllowNull is set to true, this token will be used to represent NULL values.
/// </summary>
public string NullToken { get; set; }
/// <summary>
/// The first line of the CSV file will include the names of each field.
/// </summary>
public bool HeaderRowIncluded { get; set; } = true;
/// <summary>
/// When reading a CSV file, if the first line contains the instruction `sep=`, use this
/// to determine the separator for the file.
///
/// The "sep" line is a feature exclusive to Microsoft Excel, which permits CSV files to
/// more easily handle European files where the comma character is often a separator between
/// numeric values rather than a field delimiter.
///
/// If this flag is set to true, when you parse a CSV, the first line of the parsing can override
/// the field separator logic for each individual instance of parsing, but it will not change
/// the `FieldDelimiter` in your settings object.
///
/// More information:
/// * [Original Issue Report](https://github.com/tspence/csharp-csv-reader/issues/28)
/// * [Superuser Article](https://superuser.com/questions/773644/what-is-the-sep-metadata-you-can-add-to-csvs)
/// * [Tjitjing Blog](https://blog.tjitjing.com/index.php/2016/07/set-separator-delimiter-in-csv-file-to-open-correctly-in-excel.html)
/// </summary>
public bool AllowSepLine { get; set; } = true;
/// <summary>
/// If `HeaderRowIncluded` is false, use these values for the headers
/// </summary>
public string[] AssumedHeaders { get; set; }
/// <summary>
/// Set this value to true to allow parsing for files where each row has a different number of fields
/// </summary>
public bool IgnoreDimensionErrors { get; set; } = true;
/// <summary>
/// Set this value to true to ignore header errors when deserializing
/// </summary>
public bool IgnoreHeaderErrors { get; set; }
/// <summary>
/// Set this flag to true to ignore read-only properties during serialization
/// </summary>
public bool IgnoreReadOnlyProperties { get; set; }
/// <summary>
/// Expect headers to be case sensitive during deserialization
/// </summary>
public bool HeadersCaseSensitive { get; set; }
/// <summary>
/// Exclude these columns during serialization and deserialization
/// </summary>
public string[] ExcludedColumns { get; set; }
/// <summary>
/// A list of data types that require text qualifiers during serialization.
/// </summary>
public Type[] ForceQualifierTypes { get; set; }
/// <summary>
/// Some CSV files contain an empty line at the end. If you set this flag to true, deserialization will
/// not throw an error for empty lines and will instead ignore it.
/// </summary>
public bool IgnoreEmptyLineForDeserialization { get; set; }
/// <summary>
/// When reading data from a stream, this is the block size to read at once.
/// </summary>
public int BufferSize { get; set; } = DEFAULT_BUFFER_SIZE;
internal static readonly int DEFAULT_BUFFER_SIZE = 65536;
/// <summary>
/// The encoding for converting streams of bytes to strings
/// </summary>
public Encoding Encoding { get; set; } = Encoding.UTF8;
/// <summary>
/// The format to use for serializing date time objects, by default, ISO 8601
/// </summary>
public string DateTimeFormat { get; set; } = "o";
/// <summary>
/// The behavior to use when serializing a column that is an array or enumerable type
/// </summary>
public ArrayOptions NestedArrayBehavior { get; set; } = ArrayOptions.ToString;
/// <summary>
/// The behavior to use when serializing a column that is a class
/// </summary>
public ObjectOptions NestedObjectBehavior { get; set; } = ObjectOptions.ToString;
/// <summary>
/// Standard comma-separated value (CSV) file settings
/// </summary>
public static readonly CSVSettings CSV = new CSVSettings();
/// <summary>
/// Standard comma-separated value (CSV) file settings that permit rendering of NULL values
/// </summary>
public static readonly CSVSettings CSV_PERMIT_NULL = new CSVSettings()
{
AllowNull = true,
NullToken = "NULL"
};
/// <summary>
/// Standard tab-separated value (TSV) file settings
/// </summary>
public static readonly CSVSettings TSV = new CSVSettings()
{
FieldDelimiter = '\t'
};
/// <summary>
/// Clone the existing settings, but with a different field delimiter.
///
/// Used for parsing of "sep=" lines so that the original object is immutable.
/// </summary>
/// <param name="newDelimiter">The new delimiter for the cloned settings</param>
/// <returns>The newly cloned settings with the updated delimiter</returns>
public CSVSettings CloneWithNewDelimiter(char newDelimiter)
{
var newSettings = (CSVSettings)this.MemberwiseClone();
newSettings.FieldDelimiter = newDelimiter;
return newSettings;
}
/// <summary>
/// Retrieve the list of risky characters according to this settings definition
/// </summary>
/// <returns></returns>
public char[] GetRiskyChars()
{
var riskyChars = new List<char>();
riskyChars.Add(FieldDelimiter);
riskyChars.Add(TextQualifier);
foreach (var c in LineSeparator)
{
riskyChars.Add(c);
}
// CRLF is always considered risky
riskyChars.Add('\n');
riskyChars.Add('\r');
return riskyChars.ToArray();
}
/// <summary>
/// Retrieve a hashset containing the list of types that require text qualifiers, or null if this
/// feature is not used in this settings definition
/// </summary>
/// <returns></returns>
/// <exception cref="NotImplementedException"></exception>
public Dictionary<Type, int> GetForceQualifierTypes()
{
if (ForceQualifierTypes == null) return null;
var hashSet = new Dictionary<Type, int>();
foreach (var type in ForceQualifierTypes)
{
hashSet.Add(type, 1);
}
return hashSet;
}
}
}