update 20241125
This commit is contained in:
528
CSVNET/CSV.cs
Normal file
528
CSVNET/CSV.cs
Normal file
@@ -0,0 +1,528 @@
|
||||
/*
|
||||
* 2006 - 2018 Ted Spence, http://tedspence.com
|
||||
* License: http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Home page: https://github.com/tspence/csharp-csv-reader
|
||||
*/
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
#if HAS_ASYNC
|
||||
using System.Threading.Tasks;
|
||||
#endif
|
||||
|
||||
// These suggestions from Resharper apply because we don't want it to recommend fixing things needed for Net20:
|
||||
// ReSharper disable LoopCanBeConvertedToQuery
|
||||
// ReSharper disable ConvertIfStatementToNullCoalescingAssignment
|
||||
// ReSharper disable ReplaceSubstringWithRangeIndexer
|
||||
// ReSharper disable InvertIf
|
||||
// ReSharper disable ConvertIfStatementToSwitchExpression
|
||||
// ReSharper disable ConvertIfStatementToSwitchStatement
|
||||
|
||||
namespace CSVNET
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// Root class that contains static functions for straightforward CSV parsing
|
||||
/// </summary>
|
||||
public static class CSV
|
||||
{
|
||||
/// <summary>
|
||||
/// Use this to determine what version of DotNet was used to build this library
|
||||
/// </summary>
|
||||
#if NET2_0
|
||||
public const string VERSION = "NET20";
|
||||
#elif NET4_0
|
||||
public const string VERSION = "NET40";
|
||||
#elif NET4_5
|
||||
public const string VERSION = "NET45";
|
||||
#elif NET5_0
|
||||
public const string VERSION = "NET50";
|
||||
#elif NET6_0
|
||||
public const string VERSION = "NET60";
|
||||
#elif NETSTANDARD1_0
|
||||
public const string VERSION = "NETSTANDARD10";
|
||||
#elif NETSTANDARD2_0
|
||||
public const string VERSION = "NETSTANDARD20";
|
||||
#else
|
||||
public const string VERSION = "UNKNOWN";
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Parse a CSV stream into <![CDATA[ IEnumerable<string[]> ]]>, while permitting embedded newlines
|
||||
/// </summary>
|
||||
/// <param name="inStream">The stream to read</param>
|
||||
/// <param name="settings">The CSV settings to use for this parsing operation (Default: CSV)</param>
|
||||
/// <returns>An enumerable object that can be examined to retrieve rows from the stream.</returns>
|
||||
public static IEnumerable<string[]> ParseStream(StreamReader inStream, CSVSettings settings = null)
|
||||
{
|
||||
int bufferSize = settings?.BufferSize ?? CSVSettings.DEFAULT_BUFFER_SIZE;
|
||||
var buffer = new char[bufferSize];
|
||||
var machine = new CSVStateMachine(settings);
|
||||
while (machine.State == CSVState.CanKeepGoing)
|
||||
{
|
||||
var line = string.Empty;
|
||||
if (machine.NeedsMoreText() && !inStream.EndOfStream)
|
||||
{
|
||||
var readChars = inStream.ReadBlock(buffer, 0, bufferSize);
|
||||
line = new string(buffer, 0, readChars);
|
||||
}
|
||||
var row = machine.ParseChunk(line, inStream.EndOfStream);
|
||||
if (row != null)
|
||||
{
|
||||
yield return row;
|
||||
}
|
||||
else if (inStream.EndOfStream)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if HAS_ASYNC_IENUM
|
||||
/// <summary>
|
||||
/// Parse a CSV stream into <![CDATA[ IEnumerable<string[]> ]]> asynchronously, while permitting embedded newlines
|
||||
/// </summary>
|
||||
/// <param name="inStream">The stream to read</param>
|
||||
/// <param name="settings">The CSV settings to use for this parsing operation (Default: CSV)</param>
|
||||
/// <returns>An enumerable object that can be examined to retrieve rows from the stream.</returns>
|
||||
public static async IAsyncEnumerable<string[]> ParseStreamAsync(StreamReader inStream, CSVSettings settings = null)
|
||||
{
|
||||
int bufferSize = settings?.BufferSize ?? CSVSettings.DEFAULT_BUFFER_SIZE;
|
||||
var buffer = new char[bufferSize];
|
||||
var machine = new CSVStateMachine(settings);
|
||||
while (machine.State == CSVState.CanKeepGoing)
|
||||
{
|
||||
var line = string.Empty;
|
||||
if (machine.NeedsMoreText() && !inStream.EndOfStream)
|
||||
{
|
||||
var readChars = await inStream.ReadBlockAsync(buffer, 0, bufferSize);
|
||||
line = new string(buffer, 0, readChars);
|
||||
}
|
||||
var row = machine.ParseChunk(line, inStream.EndOfStream);
|
||||
if (row != null)
|
||||
{
|
||||
yield return row;
|
||||
}
|
||||
else if (inStream.EndOfStream)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Parse a line from a CSV file and return an array of fields, or null if it fails
|
||||
/// </summary>
|
||||
/// <param name="line">One line of text from a CSV file</param>
|
||||
/// <param name="settings">The CSV settings to use for this parsing operation (Default: CSV)</param>
|
||||
/// <param name="throwOnFailure">If this value is true, throws an exception if parsing fails</param>
|
||||
/// <returns>An array containing all fields in the next row of data, or null if it could not be parsed.</returns>
|
||||
public static string[] ParseLine(string line, CSVSettings settings = null, bool? throwOnFailure = null)
|
||||
{
|
||||
string[] row = null;
|
||||
var machine = new CSVStateMachine(settings);
|
||||
while (machine.State == CSVState.CanKeepGoing)
|
||||
{
|
||||
row = machine.ParseChunk(line, true);
|
||||
line = string.Empty;
|
||||
}
|
||||
|
||||
// Anything other than success throws an error here
|
||||
if (machine.State != CSVState.Done)
|
||||
{
|
||||
throw new Exception($"Malformed CSV structure: {machine.State}");
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Try to parse a line of CSV data. Can only return false if an unterminated text qualifier is encountered.
|
||||
///
|
||||
/// This function cannot recognize 'sep=' lines because it does not know whether it is parsing the first line
|
||||
/// in the overall CSV stream.
|
||||
/// </summary>
|
||||
/// <returns>False if there was an unterminated text qualifier in the <paramref name="line"/></returns>
|
||||
/// <param name="line">The line of text to parse</param>
|
||||
/// <param name="settings">The CSV settings to use for this parsing operation (Default: CSV)</param>
|
||||
/// <param name="row">The array of fields found in the line</param>
|
||||
public static bool TryParseLine(string line, out string[] row, CSVSettings settings = null)
|
||||
{
|
||||
row = null;
|
||||
var machine = new CSVStateMachine(settings);
|
||||
while (machine.State == CSVState.CanKeepGoing)
|
||||
{
|
||||
row = machine.ParseChunk(line, true);
|
||||
line = string.Empty;
|
||||
}
|
||||
return machine.State == CSVState.Done;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserialize a CSV string into a list of typed objects
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of objects to deserialize</typeparam>
|
||||
/// <param name="settings">The CSV settings to use when parsing the source (Default: CSV)</param>
|
||||
/// <param name="source">The source CSV to deserialize</param>
|
||||
/// <returns></returns>
|
||||
public static IEnumerable<T> Deserialize<T>(string source, CSVSettings settings = null) where T : class, new()
|
||||
{
|
||||
return CSVReader.FromString(source, settings).Deserialize<T>();
|
||||
}
|
||||
|
||||
#if HAS_ASYNC_IENUM
|
||||
/// <summary>
|
||||
/// Deserialize a CSV string into a list of typed objects
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of objects to deserialize</typeparam>
|
||||
/// <param name="settings">The CSV settings to use when parsing the source (Default: CSV)</param>
|
||||
/// <param name="source">The source CSV to deserialize</param>
|
||||
/// <returns></returns>
|
||||
public static IAsyncEnumerable<T> DeserializeAsync<T>(string source, CSVSettings settings = null) where T : class, new()
|
||||
{
|
||||
return CSVReader.FromString(source, settings).DeserializeAsync<T>();
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Serialize a sequence of objects into a CSV string
|
||||
/// </summary>
|
||||
/// <returns>A single line of CSV encoded data containing these values</returns>
|
||||
/// <param name="row">A list or array of objects to serialize</param>
|
||||
/// <param name="settings">The field delimiter character (Default: comma)</param>
|
||||
#if NET2_0
|
||||
public static string ToCSVString(IEnumerable<object> row, CSVSettings settings = null)
|
||||
#else
|
||||
public static string ToCSVString(this IEnumerable<object> row, CSVSettings settings = null)
|
||||
#endif
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
var riskyChars = settings.GetRiskyChars();
|
||||
var forceQualifierTypes = settings.GetForceQualifierTypes();
|
||||
return ItemsToCsv(row, settings, riskyChars, forceQualifierTypes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Serialize an array of objects to CSV format
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of objects to serialize from this CSV</typeparam>
|
||||
/// <param name="list">The array of objects to serialize</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns>The completed CSV string representing one line per element in list</returns>
|
||||
public static string Serialize<T>(IEnumerable<T> list, CSVSettings settings = null) where T : class, new()
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
using (var ms = new MemoryStream())
|
||||
{
|
||||
using (var cw = new CSVWriter(ms, settings))
|
||||
{
|
||||
cw.Serialize(list);
|
||||
}
|
||||
|
||||
var rawString = settings.Encoding.GetString(ms.ToArray());
|
||||
return RemoveByteOrderMarker(rawString);
|
||||
}
|
||||
}
|
||||
|
||||
private static string _byteOrderMarkUtf8 =
|
||||
Encoding.UTF8.GetString(Encoding.UTF8.GetPreamble());
|
||||
internal static string RemoveByteOrderMarker(string rawString)
|
||||
{
|
||||
if (rawString.StartsWith(_byteOrderMarkUtf8, StringComparison.Ordinal))
|
||||
{
|
||||
return rawString.Substring(_byteOrderMarkUtf8.Length);
|
||||
}
|
||||
return rawString;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Serialize an array of objects to CSV format
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of objects to serialize from this CSV</typeparam>
|
||||
/// <param name="list">The array of objects to serialize</param>
|
||||
/// <param name="stream">The stream to which we will send this CSV text</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns>The completed CSV string representing one line per element in list</returns>
|
||||
public static void Serialize<T>(IEnumerable<T> list, Stream stream, CSVSettings settings = null) where T : class, new()
|
||||
{
|
||||
using (var cw = new CSVWriter(stream, settings))
|
||||
{
|
||||
cw.Serialize(list);
|
||||
}
|
||||
}
|
||||
|
||||
#if HAS_ASYNC
|
||||
/// <summary>
|
||||
/// Serialize an array of objects to CSV format
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of objects to serialize from this CSV</typeparam>
|
||||
/// <param name="list">The array of objects to serialize</param>
|
||||
/// <param name="stream">The stream to which we will send this CSV text</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns>The completed CSV string representing one line per element in list</returns>
|
||||
public static Task SerializeAsync<T>(IEnumerable<T> list, Stream stream, CSVSettings settings = null) where T : class, new()
|
||||
{
|
||||
using (var cw = new CSVWriter(stream, settings))
|
||||
{
|
||||
return cw.SerializeAsync(list);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAS_ASYNC_IENUM
|
||||
/// <summary>
|
||||
/// Serialize an array of objects to CSV format
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of objects to serialize from this CSV</typeparam>
|
||||
/// <param name="list">The array of objects to serialize</param>
|
||||
/// <param name="stream">The stream to which we will send this CSV text</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns>The completed CSV string representing one line per element in list</returns>
|
||||
public static Task SerializeAsync<T>(IAsyncEnumerable<T> list, Stream stream, CSVSettings settings = null) where T : class, new()
|
||||
{
|
||||
using (var cw = new CSVWriter(stream, settings))
|
||||
{
|
||||
return cw.SerializeAsync(list);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Add a CSV Header line to a StringBuilder for a specific type
|
||||
/// </summary>
|
||||
/// <param name="sb">The StringBuilder to append data</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
#if NET2_0
|
||||
public static void AppendCSVHeader<T>(StringBuilder sb, CSVSettings settings = null) where T: class, new()
|
||||
#else
|
||||
public static void AppendCSVHeader<T>(this StringBuilder sb, CSVSettings settings = null) where T : class, new()
|
||||
#endif
|
||||
{
|
||||
var header = Serialize(new T[] { }, settings);
|
||||
sb.Append(header);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Appends a single object to a StringBuilder in CSV format as a single line
|
||||
/// </summary>
|
||||
/// <param name="sb">The StringBuilder to append data</param>
|
||||
/// <param name="obj">The single object to append in CSV-line format</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <typeparam name="T">The 1st type parameter.</typeparam>
|
||||
#if NET2_0
|
||||
public static void AppendCSVLine<T>(StringBuilder sb, T obj, CSVSettings settings = null) where T : class, new()
|
||||
#else
|
||||
public static void AppendCSVLine<T>(this StringBuilder sb, T obj, CSVSettings settings = null) where T : class, new()
|
||||
#endif
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
|
||||
// Duplicate settings, but flag ourselves to ignore the header
|
||||
settings = settings.CloneWithNewDelimiter(settings.FieldDelimiter);
|
||||
settings.HeaderRowIncluded = false;
|
||||
var line = Serialize(new T[] { obj }, settings);
|
||||
sb.Append(line);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Internal method to convert a list of things into a CSV line using the specified settings object
|
||||
///
|
||||
/// This function assumes:
|
||||
/// * That the list of items is not null, but it may contain nulls
|
||||
/// * That settings is not null
|
||||
/// * That RiskyChars and ForceQualifierTypes have been set up correctly to match the CSV settings
|
||||
/// </summary>
|
||||
/// <param name="items"></param>
|
||||
/// <param name="settings"></param>
|
||||
/// <param name="riskyChars"></param>
|
||||
/// <param name="forceQualifierTypes"></param>
|
||||
/// <returns></returns>
|
||||
internal static string ItemsToCsv(IEnumerable items, CSVSettings settings, char[] riskyChars, Dictionary<Type, int> forceQualifierTypes)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
foreach (var item in items)
|
||||
{
|
||||
// If this is null, check our settings for what they want us to do
|
||||
if (item == null)
|
||||
{
|
||||
if (settings.AllowNull)
|
||||
{
|
||||
sb.Append(settings.NullToken);
|
||||
}
|
||||
sb.Append(settings.FieldDelimiter);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special cases for other types of serialization
|
||||
string s;
|
||||
if (item is string)
|
||||
{
|
||||
s = item as string;
|
||||
}
|
||||
else if (item is DateTime)
|
||||
{
|
||||
s = ((DateTime)item).ToString(settings.DateTimeFormat);
|
||||
}
|
||||
else
|
||||
{
|
||||
var itemType = item.GetType();
|
||||
var interfaces = itemType.GetInterfaces();
|
||||
bool isEnumerable = false;
|
||||
if (itemType != typeof(string))
|
||||
{
|
||||
foreach (var itemInterface in interfaces)
|
||||
{
|
||||
if (itemInterface == typeof(IEnumerable))
|
||||
{
|
||||
isEnumerable = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Treat enumerables as a simple class of objects that can be unrolled
|
||||
if (isEnumerable)
|
||||
{
|
||||
IEnumerable enumerable = item as IEnumerable;
|
||||
s = string.Empty;
|
||||
switch (settings.NestedArrayBehavior)
|
||||
{
|
||||
case ArrayOptions.ToString:
|
||||
s = item.ToString();
|
||||
break;
|
||||
case ArrayOptions.CountItems:
|
||||
if (enumerable != null)
|
||||
{
|
||||
int enumerableCount = 0;
|
||||
var iter = enumerable.GetEnumerator();
|
||||
using (iter as IDisposable)
|
||||
{
|
||||
while (iter.MoveNext())
|
||||
{
|
||||
enumerableCount++;
|
||||
}
|
||||
}
|
||||
s = enumerableCount.ToString();
|
||||
}
|
||||
break;
|
||||
case ArrayOptions.TreatAsNull:
|
||||
if (settings.AllowNull)
|
||||
{
|
||||
s = settings.NullToken;
|
||||
}
|
||||
else
|
||||
{
|
||||
s = string.Empty;
|
||||
}
|
||||
break;
|
||||
case ArrayOptions.RecursiveSerialization:
|
||||
if (enumerable != null)
|
||||
{
|
||||
s = ItemsToCsv(enumerable, settings, riskyChars, forceQualifierTypes);
|
||||
}
|
||||
else
|
||||
{
|
||||
s = string.Empty;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (itemType.IsClass && settings.NestedObjectBehavior == ObjectOptions.RecursiveSerialization)
|
||||
{
|
||||
var nestedItems = new List<object>();
|
||||
foreach (var field in itemType.GetFields())
|
||||
{
|
||||
nestedItems.Add(field.GetValue(item));
|
||||
}
|
||||
foreach (var prop in itemType.GetProperties())
|
||||
{
|
||||
nestedItems.Add(prop.GetValue(item, null));
|
||||
}
|
||||
s = ItemsToCsv(nestedItems, settings, riskyChars, forceQualifierTypes);
|
||||
}
|
||||
else
|
||||
{
|
||||
s = item.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this item requires qualifiers
|
||||
var requiresQualifiers = settings.ForceQualifiers || s.IndexOfAny(riskyChars) >= 0 || (forceQualifierTypes != null && forceQualifierTypes.ContainsKey(item.GetType()));
|
||||
|
||||
// Okay, let's handle this value normally
|
||||
if (requiresQualifiers) sb.Append(settings.TextQualifier);
|
||||
if (!string.IsNullOrEmpty(s))
|
||||
{
|
||||
// Only go character-by-character if necessary
|
||||
if (s.IndexOf(settings.TextQualifier) >= 0)
|
||||
{
|
||||
foreach (var c in s)
|
||||
{
|
||||
// Double up text qualifiers
|
||||
if (c == settings.TextQualifier)
|
||||
{
|
||||
sb.Append(c);
|
||||
}
|
||||
|
||||
sb.Append(c);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(s);
|
||||
}
|
||||
}
|
||||
|
||||
// Move to the next cell
|
||||
if (requiresQualifiers) sb.Append(settings.TextQualifier);
|
||||
sb.Append(settings.FieldDelimiter);
|
||||
}
|
||||
|
||||
// Subtract the trailing delimiter so we don't inadvertently add an empty column at the end
|
||||
if (sb.Length > 0)
|
||||
{
|
||||
sb.Length -= 1;
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse a separator line and determine
|
||||
/// </summary>
|
||||
/// <param name="line"></param>
|
||||
/// <returns>The separator</returns>
|
||||
public static char? ParseSepLine(string line)
|
||||
{
|
||||
if (line.StartsWith("sep", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var equals = line.Substring(3).Trim();
|
||||
if (equals.StartsWith("="))
|
||||
{
|
||||
var separator = equals.Substring(1).Trim();
|
||||
if (separator.Length > 1)
|
||||
{
|
||||
throw new Exception("Separator in 'sep=' line must be a single character");
|
||||
}
|
||||
|
||||
if (separator.Length == 1)
|
||||
{
|
||||
return separator[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This wasn't a sep line
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
135
CSVNET/CSVDataTable.cs
Normal file
135
CSVNET/CSVDataTable.cs
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* 2006 - 2018 Ted Spence, http://tedspence.com
|
||||
* License: http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Home page: https://github.com/tspence/csharp-csv-reader
|
||||
*/
|
||||
using System.Data;
|
||||
using System.IO;
|
||||
|
||||
namespace CSVNET
|
||||
{
|
||||
/// <summary>
|
||||
/// Code to assist in working with DataTable objects and CSV sources
|
||||
/// </summary>
|
||||
public static class CSVDataTable
|
||||
{
|
||||
/// <summary>
|
||||
/// Read in a single CSV file into a datatable in memory
|
||||
/// </summary>
|
||||
/// <param name="filename"></param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns>An data table of strings that were retrieved from the CSV file.</returns>
|
||||
public static DataTable FromFile(string filename, CSVSettings settings = null)
|
||||
{
|
||||
using (var sr = new StreamReader(filename))
|
||||
{
|
||||
return FromStream(sr, settings);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read in a single CSV file into a datatable in memory
|
||||
/// </summary>
|
||||
/// <param name="stream">The stream source from which to load the datatable.</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns>An data table of strings that were retrieved from the CSV file.</returns>
|
||||
public static DataTable FromStream(StreamReader stream, CSVSettings settings = null)
|
||||
{
|
||||
using (var cr = new CSVReader(stream, settings))
|
||||
{
|
||||
return cr.ReadAsDataTable();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert a CSV file (in string form) into a data table
|
||||
/// </summary>
|
||||
/// <param name="source"></param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
|
||||
/// <returns></returns>
|
||||
public static DataTable FromString(string source, CSVSettings settings = null)
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
var byteArray = settings.Encoding.GetBytes(source);
|
||||
using (var stream = new MemoryStream(byteArray))
|
||||
{
|
||||
using (var cr = new CSVReader(stream, settings))
|
||||
{
|
||||
return cr.ReadAsDataTable();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Write a data table to disk at the designated file name in CSV format
|
||||
/// </summary>
|
||||
/// <param name="dt"></param>
|
||||
/// <param name="filename"></param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this DataTable (Default: CSV)</param>
|
||||
#if NET2_0
|
||||
public static void WriteToFile(DataTable dt, string filename, CSVSettings settings = null)
|
||||
#else
|
||||
public static void WriteToFile(this DataTable dt, string filename, CSVSettings settings = null)
|
||||
#endif
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
|
||||
using (var fs = new FileStream(filename, FileMode.CreateNew))
|
||||
{
|
||||
using (var sw = new StreamWriter(fs, settings.Encoding))
|
||||
{
|
||||
WriteToStream(dt, sw, settings);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Write the data table to a stream in CSV format
|
||||
/// </summary>
|
||||
/// <param name="dt">The data table to write</param>
|
||||
/// <param name="sw">The stream where the CSV text will be written</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this DataTable (Default: CSV)</param>
|
||||
#if NET2_0
|
||||
public static void WriteToStream(DataTable dt, StreamWriter sw, CSVSettings settings = null)
|
||||
#else
|
||||
public static void WriteToStream(this DataTable dt, StreamWriter sw, CSVSettings settings = null)
|
||||
#endif
|
||||
{
|
||||
using (var cw = new CSVWriter(sw, settings))
|
||||
{
|
||||
cw.Write(dt);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Write a DataTable to a string in CSV format
|
||||
/// </summary>
|
||||
/// <param name="dt">The datatable to write</param>
|
||||
/// <param name="settings">The CSV settings to use when exporting this DataTable (Default: CSV)</param>
|
||||
/// <returns>The CSV string representing the object array.</returns>
|
||||
#if NET2_0
|
||||
public static string WriteToString(DataTable dt, CSVSettings settings = null)
|
||||
#else
|
||||
public static string WriteToString(this DataTable dt, CSVSettings settings = null)
|
||||
#endif
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
using (var ms = new MemoryStream())
|
||||
{
|
||||
var cw = new CSVWriter(ms, settings);
|
||||
cw.Write(dt);
|
||||
var rawString = settings.Encoding.GetString(ms.ToArray());
|
||||
return CSV.RemoveByteOrderMarker(rawString);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
53
CSVNET/CSVNET.csproj
Normal file
53
CSVNET/CSVNET.csproj
Normal file
@@ -0,0 +1,53 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{417A61BC-206D-4895-BAAE-E6155AFBDF2A}</ProjectGuid>
|
||||
<OutputType>Library</OutputType>
|
||||
<AppDesignerFolder>Properties</AppDesignerFolder>
|
||||
<RootNamespace>CSVNET</RootNamespace>
|
||||
<AssemblyName>CSVNET</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<Deterministic>true</Deterministic>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Net.Http" />
|
||||
<Reference Include="System.Xml" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="CSV.cs" />
|
||||
<Compile Include="CSVDataTable.cs" />
|
||||
<Compile Include="CSVReader.cs" />
|
||||
<Compile Include="CSVSettings.cs" />
|
||||
<Compile Include="CSVStateMachine.cs" />
|
||||
<Compile Include="CSVWriter.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
</Project>
|
||||
635
CSVNET/CSVReader.cs
Normal file
635
CSVNET/CSVReader.cs
Normal file
@@ -0,0 +1,635 @@
|
||||
/*
|
||||
* 2006 - 2018 Ted Spence, http://tedspence.com
|
||||
* License: http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Home page: https://github.com/tspence/csharp-csv-reader
|
||||
*/
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Data;
|
||||
using System.Reflection;
|
||||
using System.ComponentModel;
|
||||
using System.Text;
|
||||
#if HAS_ASYNC
|
||||
using System.Threading;
|
||||
#endif
|
||||
|
||||
// These suggestions from Resharper apply because we don't want it to recommend fixing things needed for Net20:
|
||||
// ReSharper disable LoopCanBeConvertedToQuery
|
||||
// ReSharper disable ConvertIfStatementToNullCoalescingAssignment
|
||||
// ReSharper disable ReplaceSubstringWithRangeIndexer
|
||||
// ReSharper disable InvertIf
|
||||
// ReSharper disable ConvertIfStatementToNullCoalescingExpression
|
||||
|
||||
namespace CSVNET
|
||||
{
|
||||
/// <summary>
|
||||
/// Keeps track of which columns are excluded from CSV serialization / deserialization
|
||||
/// </summary>
|
||||
public class ExcludedColumnHelper
|
||||
{
|
||||
/// <summary>
|
||||
/// Note that Dot Net Framework 2.0 does not support HashSet, but it does support Dictionary.
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, int> _excluded;
|
||||
private readonly CSVSettings _settings;
|
||||
|
||||
/// <summary>
|
||||
/// Construct a helper object to track which columns are excluded from serialization
|
||||
/// </summary>
|
||||
/// <param name="settings"></param>
|
||||
public ExcludedColumnHelper(CSVSettings settings)
|
||||
{
|
||||
if (settings?.ExcludedColumns == null || settings.ExcludedColumns.Length == 0)
|
||||
{
|
||||
_excluded = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
_settings = settings;
|
||||
_excluded = new Dictionary<string, int>();
|
||||
foreach (var name in _settings.ExcludedColumns)
|
||||
{
|
||||
var excludedColumnName = name;
|
||||
if (!_settings.HeadersCaseSensitive)
|
||||
{
|
||||
excludedColumnName = excludedColumnName.ToUpperInvariant();
|
||||
}
|
||||
|
||||
_excluded.Add(excludedColumnName, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// True if this column should be excluded
|
||||
/// </summary>
|
||||
/// <param name="name"></param>
|
||||
/// <returns></returns>
|
||||
public bool IsExcluded(string name)
|
||||
{
|
||||
if (_excluded == null) return false;
|
||||
var excludedColumnName = name;
|
||||
if (!_settings.HeadersCaseSensitive)
|
||||
{
|
||||
excludedColumnName = excludedColumnName.ToUpperInvariant();
|
||||
}
|
||||
|
||||
return _excluded.ContainsKey(excludedColumnName);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A helper object to deserialize a class based on CSV strings
|
||||
/// </summary>
|
||||
public class DeserializationHelper<T> where T : class, new()
|
||||
{
|
||||
private readonly int _numColumns;
|
||||
private readonly Type[] _columnTypes;
|
||||
private readonly TypeConverter[] _converters;
|
||||
private readonly PropertyInfo[] _properties;
|
||||
private readonly FieldInfo[] _fields;
|
||||
private readonly MethodInfo[] _methods;
|
||||
|
||||
/// <summary>
|
||||
/// Construct a new deserialization helper for a specific class containing all the information necessary
|
||||
/// for optimized deserialization
|
||||
/// </summary>
|
||||
/// <param name="settings"></param>
|
||||
/// <param name="headers"></param>
|
||||
public DeserializationHelper(CSVSettings settings, string[] headers)
|
||||
{
|
||||
var settings1 = settings;
|
||||
if (settings1 == null)
|
||||
{
|
||||
settings1 = CSVSettings.TSV;
|
||||
}
|
||||
if (headers == null) throw new Exception("CSV must have headers to be deserialized");
|
||||
var return_type = typeof(T);
|
||||
_numColumns = headers.Length;
|
||||
|
||||
// Set binding flags correctly
|
||||
var bindings = BindingFlags.Public | BindingFlags.Instance;
|
||||
if (!settings1.HeadersCaseSensitive)
|
||||
{
|
||||
bindings |= BindingFlags.IgnoreCase;
|
||||
}
|
||||
|
||||
// Set up the list of excluded columns
|
||||
var excluded = new ExcludedColumnHelper(settings1);
|
||||
|
||||
// Determine how to handle each column in the file - check properties, fields, and methods
|
||||
_columnTypes = new Type[_numColumns];
|
||||
_converters = new TypeConverter[_numColumns];
|
||||
_properties = new PropertyInfo[_numColumns];
|
||||
_fields = new FieldInfo[_numColumns];
|
||||
_methods = new MethodInfo[_numColumns];
|
||||
for (var i = 0; i < _numColumns; i++)
|
||||
{
|
||||
// Is this column excluded?
|
||||
if (excluded.IsExcluded(headers[i])) continue;
|
||||
|
||||
// Check if this is a property
|
||||
_properties[i] = return_type.GetProperty(headers[i], bindings);
|
||||
if (_properties[i] != null && !_properties[i].CanWrite)
|
||||
{
|
||||
if (settings1.IgnoreReadOnlyProperties && settings1.IgnoreHeaderErrors)
|
||||
{
|
||||
_properties[i] = null;
|
||||
continue;
|
||||
}
|
||||
throw new Exception($"The column header '{headers[i]}' matches a read-only property. To ignore this exception, enable IgnoreReadOnlyProperties and IgnoreHeaderErrors.");
|
||||
}
|
||||
|
||||
// If we failed to get a property handler, let's try a field handler
|
||||
if (_properties[i] == null)
|
||||
{
|
||||
_fields[i] = return_type.GetField(headers[i], bindings);
|
||||
|
||||
// If we failed to get a field handler, let's try a method
|
||||
if (_fields[i] == null)
|
||||
{
|
||||
// Methods must be treated differently - we have to ensure that the method has a single parameter
|
||||
var mi = return_type.GetMethod(headers[i], bindings);
|
||||
if (mi != null)
|
||||
{
|
||||
if (mi.GetParameters().Length == 1)
|
||||
{
|
||||
_methods[i] = mi;
|
||||
_columnTypes[i] = mi.GetParameters()[0].ParameterType;
|
||||
}
|
||||
else if (!settings1.IgnoreHeaderErrors)
|
||||
{
|
||||
throw new Exception(
|
||||
$"The column header '{headers[i]}' matched a method with more than one parameter.");
|
||||
}
|
||||
}
|
||||
else if (!settings1.IgnoreHeaderErrors)
|
||||
{
|
||||
throw new Exception(
|
||||
$"The column header '{headers[i]}' was not found in the class '{return_type.FullName}'.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_columnTypes[i] = _fields[i].FieldType;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_columnTypes[i] = _properties[i].PropertyType;
|
||||
}
|
||||
|
||||
if (_columnTypes[i] != null)
|
||||
{
|
||||
_converters[i] = TypeDescriptor.GetConverter(_columnTypes[i]);
|
||||
if (_converters[i] == null && !settings1.IgnoreHeaderErrors)
|
||||
{
|
||||
throw new Exception(
|
||||
$"The column {headers[i]} (type {_columnTypes[i]}) does not have a type converter.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserialize a single row using precomputed converters
|
||||
/// </summary>
|
||||
/// <param name="line"></param>
|
||||
/// <param name="row_num"></param>
|
||||
/// <param name="settings"></param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public T Deserialize(string[] line, int row_num, CSVSettings settings)
|
||||
{
|
||||
// If this line is completely empty, do our settings permit us to ignore the empty line?
|
||||
if (line.Length == 0 || (line.Length == 1 && line[0] == string.Empty) && settings.IgnoreEmptyLineForDeserialization)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Does this line match the length of the first line? Does the caller want us to complain?
|
||||
if (line.Length != _numColumns && !settings.IgnoreHeaderErrors)
|
||||
{
|
||||
throw new Exception($"Line #{row_num} contains {line.Length} columns; expected {_numColumns}");
|
||||
}
|
||||
|
||||
// Construct a new object and execute each column on it
|
||||
var obj = new T();
|
||||
for (var i = 0; i < Math.Min(line.Length, _numColumns); i++)
|
||||
{
|
||||
if (_converters[i] == null) continue;
|
||||
|
||||
// Attempt to convert this to the specified type
|
||||
object value = null;
|
||||
if (settings.AllowNull && (line[i] == null || line[i] == settings.NullToken))
|
||||
{
|
||||
value = null;
|
||||
}
|
||||
else if (_converters[i].IsValid(line[i]))
|
||||
{
|
||||
value = _converters[i].ConvertFromString(line[i]);
|
||||
}
|
||||
else if (!settings.IgnoreHeaderErrors)
|
||||
{
|
||||
throw new Exception(
|
||||
$"The value '{line[i]}' cannot be converted to the type {_columnTypes[i]}.");
|
||||
}
|
||||
|
||||
// Can we set this value to the object as a property?
|
||||
if (_properties[i] != null)
|
||||
{
|
||||
_properties[i].SetValue(obj, value, null);
|
||||
}
|
||||
else if (_fields[i] != null)
|
||||
{
|
||||
_fields[i].SetValue(obj, value);
|
||||
}
|
||||
else if (_methods[i] != null)
|
||||
{
|
||||
_methods[i].Invoke(obj, new object[] { value });
|
||||
}
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A reader that reads from a stream and emits CSV records
|
||||
/// </summary>
|
||||
#if HAS_ASYNC_IENUM
|
||||
public class CSVReader : IAsyncEnumerable<string[]>, IEnumerable<string[]>, IDisposable
|
||||
#else
|
||||
public class CSVReader : IEnumerable<string[]>, IDisposable
|
||||
#endif
|
||||
|
||||
{
|
||||
private readonly CSVSettings _settings;
|
||||
private readonly StreamReader _stream;
|
||||
|
||||
/// <summary>
|
||||
/// The settings currently in use by this reader
|
||||
/// </summary>
|
||||
public CSVSettings Settings
|
||||
{
|
||||
get { return _settings; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// If the first row in the file is a header row, this will be populated
|
||||
/// </summary>
|
||||
public string[] Headers { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Convenience function to read from a string
|
||||
/// </summary>
|
||||
/// <param name="source">The string to read</param>
|
||||
/// <param name="settings">The CSV settings to use for this reader (Default: CSV)</param>
|
||||
/// <returns></returns>
|
||||
public static CSVReader FromString(string source, CSVSettings settings = null)
|
||||
{
|
||||
if (settings == null)
|
||||
{
|
||||
settings = CSVSettings.CSV;
|
||||
}
|
||||
var byteArray = settings.Encoding.GetBytes(source);
|
||||
var stream = new MemoryStream(byteArray);
|
||||
return new CSVReader(stream, settings);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convenience function to read from a file on disk
|
||||
/// </summary>
|
||||
/// <param name="filename">The file to read</param>
|
||||
/// <param name="settings">The CSV settings to use for this reader (Default: CSV)</param>
|
||||
/// <param name="encoding">The string encoding to use for the reader (Default: UTF8)</param>
|
||||
/// <returns></returns>
|
||||
public static CSVReader FromFile(string filename, CSVSettings settings = null, Encoding encoding = null)
|
||||
{
|
||||
if (encoding == null)
|
||||
{
|
||||
encoding = Encoding.UTF8;
|
||||
}
|
||||
var sr = new StreamReader(filename, encoding);
|
||||
return new CSVReader(sr, settings);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Construct a new CSV reader off a streamed source
|
||||
/// </summary>
|
||||
/// <param name="source">The stream source. Note that when disposed, the CSV Reader will dispose the stream reader.</param>
|
||||
/// <param name="settings">The CSV settings to use for this reader (Default: CSV)</param>
|
||||
public CSVReader(StreamReader source, CSVSettings settings = null)
|
||||
{
|
||||
_stream = source;
|
||||
_settings = settings;
|
||||
if (_settings == null)
|
||||
{
|
||||
_settings = CSVSettings.CSV;
|
||||
}
|
||||
|
||||
// Do we need to parse headers?
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
var line = source.ReadLine();
|
||||
if (_settings.AllowSepLine)
|
||||
{
|
||||
var newDelimiter = CSV.ParseSepLine(line);
|
||||
if (newDelimiter != null)
|
||||
{
|
||||
// We don't want to change the original settings, since they may be a singleton
|
||||
_settings = _settings.CloneWithNewDelimiter(newDelimiter.Value);
|
||||
line = source.ReadLine();
|
||||
}
|
||||
}
|
||||
|
||||
Headers = CSV.ParseLine(line, _settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
Headers = _settings.AssumedHeaders;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Construct a new CSV reader off a streamed source
|
||||
/// </summary>
|
||||
/// <param name="source">The stream source. Note that when disposed, the CSV Reader will dispose the stream reader.</param>
|
||||
/// <param name="settings">The CSV settings to use for this reader (Default: CSV)</param>
|
||||
public CSVReader(Stream source, CSVSettings settings = null)
|
||||
{
|
||||
_settings = settings;
|
||||
if (_settings == null)
|
||||
{
|
||||
_settings = CSVSettings.CSV;
|
||||
}
|
||||
_stream = new StreamReader(source, _settings.Encoding);
|
||||
|
||||
// Do we need to parse headers?
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
var line = _stream.ReadLine();
|
||||
if (_settings.AllowSepLine)
|
||||
{
|
||||
var newDelimiter = CSV.ParseSepLine(line);
|
||||
if (newDelimiter != null)
|
||||
{
|
||||
// We don't want to change the original settings, since they may be a singleton
|
||||
_settings = _settings.CloneWithNewDelimiter(newDelimiter.Value);
|
||||
line = _stream.ReadLine();
|
||||
}
|
||||
}
|
||||
|
||||
Headers = CSV.ParseLine(line, _settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
Headers = _settings.AssumedHeaders;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Iterate through all lines in this CSV file
|
||||
/// </summary>
|
||||
/// <returns>An array of all data columns in the line</returns>
|
||||
public IEnumerable<string[]> Lines()
|
||||
{
|
||||
return CSV.ParseStream(_stream, _settings);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Iterate through all lines in this CSV file
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public IEnumerator<string[]> GetEnumerator()
|
||||
{
|
||||
return CSV.ParseStream(_stream, _settings).GetEnumerator();
|
||||
}
|
||||
|
||||
IEnumerator IEnumerable.GetEnumerator()
|
||||
{
|
||||
return GetEnumerator();
|
||||
}
|
||||
|
||||
#if HAS_ASYNC_IENUM
|
||||
/// <summary>
|
||||
/// Iterate through all lines in this CSV file using async
|
||||
/// </summary>
|
||||
/// <returns>An array of all data columns in the line</returns>
|
||||
public IAsyncEnumerable<string[]> LinesAsync()
|
||||
{
|
||||
return CSV.ParseStreamAsync(_stream, _settings);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Iterate through all lines in this CSV file using async
|
||||
/// </summary>
|
||||
/// <returns>An array of all data columns in the line</returns>
|
||||
public IAsyncEnumerator<string[]> GetAsyncEnumerator(CancellationToken cancellationToken = new CancellationToken())
|
||||
{
|
||||
return CSV.ParseStreamAsync(_stream, _settings).GetAsyncEnumerator(cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserialize the CSV reader into a generic list
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type of data to deserialize</typeparam>
|
||||
/// <returns>A streaming collection of records from the CSV source</returns>
|
||||
/// <exception cref="Exception">If the CSV source cannot be parsed into the type, throws exceptions</exception>
|
||||
public async IAsyncEnumerable<T> DeserializeAsync<T>() where T : class, new()
|
||||
{
|
||||
var helper = new DeserializationHelper<T>(_settings, Headers);
|
||||
|
||||
// Alright, let's retrieve CSV lines and parse each one!
|
||||
var row_num = 0;
|
||||
await foreach (var line in this)
|
||||
{
|
||||
row_num++;
|
||||
var obj = helper.Deserialize(line, row_num, _settings);
|
||||
if (obj != null)
|
||||
{
|
||||
yield return obj;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Read this file into a data table in memory
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public DataTable ReadAsDataTable()
|
||||
{
|
||||
var dt = new DataTable();
|
||||
string[] firstLine = null;
|
||||
|
||||
// File contains column names - so name each column properly
|
||||
if (Headers == null)
|
||||
{
|
||||
var rawLine = _stream.ReadLine();
|
||||
firstLine = CSV.ParseLine(rawLine, _settings);
|
||||
var list = new List<string>();
|
||||
for (var i = 0; i < firstLine.Length; i++)
|
||||
{
|
||||
list.Add($"Column{i}");
|
||||
}
|
||||
|
||||
this.Headers = list.ToArray();
|
||||
}
|
||||
|
||||
// Add headers
|
||||
var numColumns = Headers.Length;
|
||||
foreach (var t in Headers)
|
||||
{
|
||||
dt.Columns.Add(new DataColumn(t, typeof(string)));
|
||||
}
|
||||
|
||||
// If we had to read the first line to get dimensions, add it
|
||||
var row_num = 1;
|
||||
if (firstLine != null)
|
||||
{
|
||||
dt.Rows.Add(firstLine);
|
||||
row_num++;
|
||||
}
|
||||
|
||||
// Start reading through the file
|
||||
foreach (var line in CSV.ParseStream(_stream, _settings))
|
||||
{
|
||||
|
||||
// Does this line match the length of the first line?
|
||||
if (line.Length != numColumns)
|
||||
{
|
||||
if (!_settings.IgnoreDimensionErrors)
|
||||
{
|
||||
throw new Exception($"Line #{row_num} contains {line.Length} columns; expected {numColumns}");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// Add as best we can - construct a new line and make it fit
|
||||
var list = new List<string>();
|
||||
list.AddRange(line);
|
||||
while (list.Count < numColumns)
|
||||
{
|
||||
list.Add("");
|
||||
}
|
||||
dt.Rows.Add(list.GetRange(0, numColumns).ToArray());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dt.Rows.Add(line);
|
||||
}
|
||||
|
||||
// Keep track of where we are in the file
|
||||
row_num++;
|
||||
}
|
||||
|
||||
// Here's your data table
|
||||
return dt;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserialize the CSV reader into a generic list
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The type to deserialize</typeparam>
|
||||
/// <returns>A streaming collection of objects as they are read from the source</returns>
|
||||
/// <exception cref="Exception">If the CSV formatting does not match the object, throw errors</exception>
|
||||
public IEnumerable<T> Deserialize<T>() where T : class, new()
|
||||
{
|
||||
var helper = new DeserializationHelper<T>(_settings, Headers);
|
||||
|
||||
// Alright, let's retrieve CSV lines and parse each one!
|
||||
var row_num = 0;
|
||||
foreach (var line in this)
|
||||
{
|
||||
row_num++;
|
||||
var obj = helper.Deserialize(line, row_num, _settings);
|
||||
if (obj != null)
|
||||
{
|
||||
yield return obj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Close our resources - specifically, the stream reader
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_stream.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Take a CSV file and chop it into multiple chunks of a specified maximum size.
|
||||
/// </summary>
|
||||
/// <param name="filename">The input filename to chop</param>
|
||||
/// <param name="out_folder">The folder where the chopped CSV will be saved</param>
|
||||
/// <param name="maxLinesPerFile">The maximum number of lines to put into each file</param>
|
||||
/// <param name="settings">The CSV settings to use when chopping this file into chunks (Default: CSV)</param>
|
||||
/// <returns>Number of files chopped</returns>
|
||||
public static int ChopFile(string filename, string out_folder, int maxLinesPerFile, CSVSettings settings = null)
|
||||
{
|
||||
// Default settings
|
||||
if (settings == null) settings = CSVSettings.CSV;
|
||||
|
||||
// Let's begin parsing
|
||||
var file_id = 1;
|
||||
var line_count = 0;
|
||||
var file_prefix = Path.GetFileNameWithoutExtension(filename);
|
||||
var ext = Path.GetExtension(filename);
|
||||
CSVWriter cw = null;
|
||||
StreamWriter sw = null;
|
||||
|
||||
// Read in lines from the file
|
||||
using (var sr = new StreamReader(filename))
|
||||
{
|
||||
using (var cr = new CSVReader(sr, settings))
|
||||
{
|
||||
|
||||
// Okay, let's do the real work
|
||||
foreach (var line in cr.Lines())
|
||||
{
|
||||
|
||||
// Do we need to create a file for writing?
|
||||
if (cw == null)
|
||||
{
|
||||
var fn = Path.Combine(out_folder, file_prefix + file_id.ToString() + ext);
|
||||
var fs = new FileStream(fn, FileMode.CreateNew);
|
||||
sw = new StreamWriter(fs, settings.Encoding);
|
||||
cw = new CSVWriter(sw, settings);
|
||||
if (settings.HeaderRowIncluded)
|
||||
{
|
||||
cw.WriteLine(cr.Headers);
|
||||
}
|
||||
}
|
||||
|
||||
// Write one line
|
||||
cw.WriteLine(line);
|
||||
|
||||
// Count lines - close the file if done
|
||||
line_count++;
|
||||
if (line_count >= maxLinesPerFile)
|
||||
{
|
||||
cw.Dispose();
|
||||
cw = null;
|
||||
file_id++;
|
||||
line_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the final CSVWriter is closed properly
|
||||
if (cw != null)
|
||||
{
|
||||
cw.Dispose();
|
||||
cw = null;
|
||||
}
|
||||
return file_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
262
CSVNET/CSVSettings.cs
Normal file
262
CSVNET/CSVSettings.cs
Normal file
@@ -0,0 +1,262 @@
|
||||
/*
|
||||
* 2006 - 2018 Ted Spence, http://tedspence.com
|
||||
* License: http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Home page: https://github.com/tspence/csharp-csv-reader
|
||||
*/
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace CSVNET
|
||||
{
|
||||
/// <summary>
|
||||
/// Defines the behavior of CSV serialization when a nested array is encountered
|
||||
/// </summary>
|
||||
public enum ArrayOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Use built-in string conversion, which renders arrays as `MyNamespace.MyObject[]`
|
||||
/// </summary>
|
||||
ToString,
|
||||
|
||||
/// <summary>
|
||||
/// Convert any array columns that are array types into nulls (either blanks or null tokens)
|
||||
/// </summary>
|
||||
TreatAsNull,
|
||||
|
||||
/// <summary>
|
||||
/// Render the number of items in the array
|
||||
/// </summary>
|
||||
CountItems,
|
||||
|
||||
/// <summary>
|
||||
/// Serialize child arrays recursively using the same settings
|
||||
/// </summary>
|
||||
RecursiveSerialization,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Defines the behavior of CSV Serialization when a nested object (class) is encountered
|
||||
/// </summary>
|
||||
public enum ObjectOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Use built-in string conversion, which renders as `MyNamespace.MyObject`
|
||||
/// </summary>
|
||||
ToString,
|
||||
|
||||
/// <summary>
|
||||
/// Serialize child objects recursively using the same settings
|
||||
/// </summary>
|
||||
RecursiveSerialization,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Settings to configure how a CSV file is parsed
|
||||
/// </summary>
|
||||
public class CSVSettings
|
||||
{
|
||||
/// <summary>
|
||||
/// The character used to delimit individual fields in the CSV.
|
||||
/// </summary>
|
||||
public char FieldDelimiter { get; set; } = ',';
|
||||
|
||||
/// <summary>
|
||||
/// The character used to enclose fields that contain the delimiter character.
|
||||
/// </summary>
|
||||
public char TextQualifier { get; set; } = '"';
|
||||
|
||||
/// <summary>
|
||||
/// The separator used to indicate the end of a line in the CSV file.
|
||||
/// </summary>
|
||||
public string LineSeparator { get; set; } = Environment.NewLine;
|
||||
|
||||
/// <summary>
|
||||
/// Set this value to true to enclose all fields in the text qualifier character.
|
||||
/// </summary>
|
||||
public bool ForceQualifiers { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Set this value to true to allow nulls to be rendered in CSV files when serializing
|
||||
/// and deserializing.
|
||||
///
|
||||
/// CSV files by default do not have a mechanism for differentiating between null fields
|
||||
/// and empty fields. If this field is set to false, both `null` and empty string will
|
||||
/// render as an empty string.
|
||||
///
|
||||
/// If this field is set to true, all non-null fields will be enclosed by the text qualifier,
|
||||
/// and fields that are null will be represented as `NullToken`.
|
||||
/// </summary>
|
||||
public bool AllowNull { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// If AllowNull is set to true, this token will be used to represent NULL values.
|
||||
/// </summary>
|
||||
public string NullToken { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The first line of the CSV file will include the names of each field.
|
||||
/// </summary>
|
||||
public bool HeaderRowIncluded { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// When reading a CSV file, if the first line contains the instruction `sep=`, use this
|
||||
/// to determine the separator for the file.
|
||||
///
|
||||
/// The "sep" line is a feature exclusive to Microsoft Excel, which permits CSV files to
|
||||
/// more easily handle European files where the comma character is often a separator between
|
||||
/// numeric values rather than a field delimiter.
|
||||
///
|
||||
/// If this flag is set to true, when you parse a CSV, the first line of the parsing can override
|
||||
/// the field separator logic for each individual instance of parsing, but it will not change
|
||||
/// the `FieldDelimiter` in your settings object.
|
||||
///
|
||||
/// More information:
|
||||
/// * [Original Issue Report](https://github.com/tspence/csharp-csv-reader/issues/28)
|
||||
/// * [Superuser Article](https://superuser.com/questions/773644/what-is-the-sep-metadata-you-can-add-to-csvs)
|
||||
/// * [Tjitjing Blog](https://blog.tjitjing.com/index.php/2016/07/set-separator-delimiter-in-csv-file-to-open-correctly-in-excel.html)
|
||||
/// </summary>
|
||||
public bool AllowSepLine { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// If `HeaderRowIncluded` is false, use these values for the headers
|
||||
/// </summary>
|
||||
public string[] AssumedHeaders { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Set this value to true to allow parsing for files where each row has a different number of fields
|
||||
/// </summary>
|
||||
public bool IgnoreDimensionErrors { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Set this value to true to ignore header errors when deserializing
|
||||
/// </summary>
|
||||
public bool IgnoreHeaderErrors { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Set this flag to true to ignore read-only properties during serialization
|
||||
/// </summary>
|
||||
public bool IgnoreReadOnlyProperties { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Expect headers to be case sensitive during deserialization
|
||||
/// </summary>
|
||||
public bool HeadersCaseSensitive { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Exclude these columns during serialization and deserialization
|
||||
/// </summary>
|
||||
public string[] ExcludedColumns { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// A list of data types that require text qualifiers during serialization.
|
||||
/// </summary>
|
||||
public Type[] ForceQualifierTypes { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Some CSV files contain an empty line at the end. If you set this flag to true, deserialization will
|
||||
/// not throw an error for empty lines and will instead ignore it.
|
||||
/// </summary>
|
||||
public bool IgnoreEmptyLineForDeserialization { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// When reading data from a stream, this is the block size to read at once.
|
||||
/// </summary>
|
||||
public int BufferSize { get; set; } = DEFAULT_BUFFER_SIZE;
|
||||
internal static readonly int DEFAULT_BUFFER_SIZE = 65536;
|
||||
|
||||
/// <summary>
|
||||
/// The encoding for converting streams of bytes to strings
|
||||
/// </summary>
|
||||
public Encoding Encoding { get; set; } = Encoding.UTF8;
|
||||
|
||||
/// <summary>
|
||||
/// The format to use for serializing date time objects, by default, ISO 8601
|
||||
/// </summary>
|
||||
public string DateTimeFormat { get; set; } = "o";
|
||||
|
||||
/// <summary>
|
||||
/// The behavior to use when serializing a column that is an array or enumerable type
|
||||
/// </summary>
|
||||
public ArrayOptions NestedArrayBehavior { get; set; } = ArrayOptions.ToString;
|
||||
|
||||
/// <summary>
|
||||
/// The behavior to use when serializing a column that is a class
|
||||
/// </summary>
|
||||
public ObjectOptions NestedObjectBehavior { get; set; } = ObjectOptions.ToString;
|
||||
|
||||
/// <summary>
|
||||
/// Standard comma-separated value (CSV) file settings
|
||||
/// </summary>
|
||||
public static readonly CSVSettings CSV = new CSVSettings();
|
||||
|
||||
/// <summary>
|
||||
/// Standard comma-separated value (CSV) file settings that permit rendering of NULL values
|
||||
/// </summary>
|
||||
public static readonly CSVSettings CSV_PERMIT_NULL = new CSVSettings()
|
||||
{
|
||||
AllowNull = true,
|
||||
NullToken = "NULL"
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Standard tab-separated value (TSV) file settings
|
||||
/// </summary>
|
||||
public static readonly CSVSettings TSV = new CSVSettings()
|
||||
{
|
||||
FieldDelimiter = '\t'
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Clone the existing settings, but with a different field delimiter.
|
||||
///
|
||||
/// Used for parsing of "sep=" lines so that the original object is immutable.
|
||||
/// </summary>
|
||||
/// <param name="newDelimiter">The new delimiter for the cloned settings</param>
|
||||
/// <returns>The newly cloned settings with the updated delimiter</returns>
|
||||
public CSVSettings CloneWithNewDelimiter(char newDelimiter)
|
||||
{
|
||||
var newSettings = (CSVSettings)this.MemberwiseClone();
|
||||
newSettings.FieldDelimiter = newDelimiter;
|
||||
return newSettings;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieve the list of risky characters according to this settings definition
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public char[] GetRiskyChars()
|
||||
{
|
||||
var riskyChars = new List<char>();
|
||||
riskyChars.Add(FieldDelimiter);
|
||||
riskyChars.Add(TextQualifier);
|
||||
foreach (var c in LineSeparator)
|
||||
{
|
||||
riskyChars.Add(c);
|
||||
}
|
||||
|
||||
// CRLF is always considered risky
|
||||
riskyChars.Add('\n');
|
||||
riskyChars.Add('\r');
|
||||
return riskyChars.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieve a hashset containing the list of types that require text qualifiers, or null if this
|
||||
/// feature is not used in this settings definition
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="NotImplementedException"></exception>
|
||||
public Dictionary<Type, int> GetForceQualifierTypes()
|
||||
{
|
||||
if (ForceQualifierTypes == null) return null;
|
||||
var hashSet = new Dictionary<Type, int>();
|
||||
foreach (var type in ForceQualifierTypes)
|
||||
{
|
||||
hashSet.Add(type, 1);
|
||||
}
|
||||
|
||||
return hashSet;
|
||||
}
|
||||
}
|
||||
}
|
||||
269
CSVNET/CSVStateMachine.cs
Normal file
269
CSVNET/CSVStateMachine.cs
Normal file
@@ -0,0 +1,269 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
// These suggestions from Resharper apply because we don't want it to recommend fixing things needed for Net20:
|
||||
// ReSharper disable LoopCanBeConvertedToQuery
|
||||
// ReSharper disable ConvertIfStatementToNullCoalescingAssignment
|
||||
// ReSharper disable ReplaceSubstringWithRangeIndexer
|
||||
// ReSharper disable InvertIf
|
||||
// ReSharper disable ConvertIfStatementToSwitchExpression
|
||||
// ReSharper disable ConvertIfStatementToSwitchStatement
|
||||
// ReSharper disable ReturnTypeCanBeEnumerable.Global
|
||||
|
||||
namespace CSVNET
|
||||
{
|
||||
/// <summary>
|
||||
/// The current state of CSV processing, given the text that has been seen so far
|
||||
/// </summary>
|
||||
public enum CSVState
|
||||
{
|
||||
/// <summary>
|
||||
/// We have reached the end of the CSV and everything is done
|
||||
/// </summary>
|
||||
Done,
|
||||
|
||||
/// <summary>
|
||||
/// We don't need more text at the moment
|
||||
/// </summary>
|
||||
CanKeepGoing,
|
||||
|
||||
/// <summary>
|
||||
/// The CSV reached the end, but there was a missing (unpaired) text qualifier.
|
||||
/// For example:
|
||||
/// `1,2,3,"test`
|
||||
/// </summary>
|
||||
MissingTrailingQualifier
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This state machine handles all functions of CSV processing except for the I/O, which can come in a variety
|
||||
/// of forms, either from a stream or an in-memory collection.
|
||||
///
|
||||
/// Since some CSV files have a single row of data that comprises multiple lines, this state machine may or may
|
||||
/// not produce one row of data for each chunk of text received.
|
||||
/// </summary>
|
||||
public class CSVStateMachine
|
||||
{
|
||||
private readonly CSVSettings _settings;
|
||||
private string _line;
|
||||
private readonly List<string> _list;
|
||||
private readonly StringBuilder _work;
|
||||
private int _position;
|
||||
private char _delimiter;
|
||||
private bool _allowSepLine;
|
||||
private bool _inTextQualifier;
|
||||
|
||||
/// <summary>
|
||||
/// Whether the state machine has concluded or can continue processing
|
||||
/// </summary>
|
||||
public CSVState State { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if we need more text
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public bool NeedsMoreText()
|
||||
{
|
||||
return String.IsNullOrEmpty(_line) || _position + _settings.LineSeparator.Length >= _line.Length;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Constructs a new state machine to begin processing CSV text
|
||||
/// </summary>
|
||||
public CSVStateMachine(CSVSettings settings)
|
||||
{
|
||||
_line = "";
|
||||
_list = new List<string>();
|
||||
_work = new StringBuilder();
|
||||
_settings = settings ?? CSVSettings.CSV;
|
||||
_position = -1;
|
||||
|
||||
// The presence of a "sep=" line may affect these values
|
||||
_delimiter = _settings.FieldDelimiter;
|
||||
_allowSepLine = _settings.AllowSepLine;
|
||||
|
||||
// We are ready for work
|
||||
State = CSVState.CanKeepGoing;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse a new chunk of text retrieved via some other means than a stream.
|
||||
///
|
||||
/// Call this function when you are retrieving your own text and when each chunk may or may not
|
||||
/// include line separators, and your stream does not consume line separators on its own.
|
||||
/// </summary>
|
||||
/// <param name="chunk">The new data to process</param>
|
||||
/// <param name="reachedEnd">Set this value to true </param>
|
||||
/// <returns>If this parsing operation produces a valid row, this will be non-null</returns>
|
||||
public string[] ParseChunk(string chunk, bool reachedEnd)
|
||||
{
|
||||
// Detect end of stream
|
||||
if (reachedEnd && string.IsNullOrEmpty(chunk) && _position == -1 && string.IsNullOrEmpty(_line))
|
||||
{
|
||||
State = CSVState.Done;
|
||||
return null;
|
||||
}
|
||||
|
||||
// If we're at the end of the line, remember to backtrack one because we increment immediately
|
||||
if (_position == _line.Length)
|
||||
{
|
||||
_position -= 1;
|
||||
}
|
||||
|
||||
// Add this chunk to the current processing logic
|
||||
_line += chunk;
|
||||
|
||||
// Check for the presence of a "sep=" line once at the beginning of a stream
|
||||
if (_allowSepLine)
|
||||
{
|
||||
var newDelimiter = CSV.ParseSepLine(_line);
|
||||
_allowSepLine = false;
|
||||
if (newDelimiter != null)
|
||||
{
|
||||
_delimiter = newDelimiter.Value;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Process one character at a time from the current line
|
||||
while (_position < _line.Length || !reachedEnd)
|
||||
{
|
||||
_position++;
|
||||
|
||||
// Have we reached the end of the stream?
|
||||
if (_position >= _line.Length)
|
||||
{
|
||||
if (reachedEnd)
|
||||
{
|
||||
// If we reached the end while still in a text qualifier, the CSV is broken
|
||||
if (_inTextQualifier)
|
||||
{
|
||||
State = CSVState.MissingTrailingQualifier;
|
||||
return null;
|
||||
}
|
||||
|
||||
// We always add the final work item here because trailing empty strings are valid
|
||||
State = CSVState.Done;
|
||||
_list.Add(_work.ToString());
|
||||
_line = string.Empty;
|
||||
_position = -1;
|
||||
return _list.ToArray();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
var c = _line[_position];
|
||||
|
||||
// If we are resuming after starting a text qualifier, can we find the end?
|
||||
if (_inTextQualifier)
|
||||
{
|
||||
var p2 = -1;
|
||||
while (p2 < 0)
|
||||
{
|
||||
p2 = _line.IndexOf(_settings.TextQualifier, _position + 1);
|
||||
if (p2 < 0)
|
||||
{
|
||||
if (reachedEnd)
|
||||
{
|
||||
State = CSVState.MissingTrailingQualifier;
|
||||
}
|
||||
|
||||
// Backtrack one character so we can move forward when the next chunk loads
|
||||
_position--;
|
||||
return null;
|
||||
}
|
||||
|
||||
// Append the text between the qualifiers
|
||||
_work.Append(_line.Substring(_position + 1, p2 - _position - 1));
|
||||
_position = p2;
|
||||
|
||||
// If the user put in a doubled-up qualifier, e.g. `""`, insert a single one and continue
|
||||
if (p2 + 1 < _line.Length && _line[p2 + 1] == _settings.TextQualifier)
|
||||
{
|
||||
_work.Append(_settings.TextQualifier);
|
||||
_position++;
|
||||
p2 = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// We're done parsing this text qualifier
|
||||
_inTextQualifier = false;
|
||||
}
|
||||
// Is this the start of a text qualified field?
|
||||
else if (c == _settings.TextQualifier && _work.Length == 0)
|
||||
{
|
||||
_inTextQualifier = true;
|
||||
_position--;
|
||||
}
|
||||
// Are we at a line separator? Let's do a quick test first
|
||||
else if (c == _settings.LineSeparator[0])
|
||||
{
|
||||
// If we don't have enough characters left to test the line separator properly, ask for more
|
||||
var notEnoughChars = _position + _settings.LineSeparator.Length > _line.Length;
|
||||
if (notEnoughChars && !reachedEnd)
|
||||
{
|
||||
// Backtrack one character so we can pick up the line separator completely next time
|
||||
_position--;
|
||||
return null;
|
||||
}
|
||||
|
||||
// If we have reached the end, but this isn't a complete line separator, it's just text
|
||||
if (notEnoughChars)
|
||||
{
|
||||
_work.Append(c);
|
||||
}
|
||||
// OK, we have enough characters, see if this is a line separator
|
||||
else if (string.Equals(_line.Substring(_position, _settings.LineSeparator.Length), _settings.LineSeparator))
|
||||
{
|
||||
_line = _line.Substring(_position + _settings.LineSeparator.Length);
|
||||
_position = -1;
|
||||
_list.Add(_work.ToString());
|
||||
var row = _list.ToArray();
|
||||
_list.Clear();
|
||||
_work.Length = 0;
|
||||
return row;
|
||||
}
|
||||
// It's not a line separator, it's just a normal character
|
||||
else
|
||||
{
|
||||
_work.Append(c);
|
||||
}
|
||||
}
|
||||
// Does this start a new field?
|
||||
else if (c == _delimiter)
|
||||
{
|
||||
// Is this a null token, and do we permit null tokens?
|
||||
var s = _work.ToString();
|
||||
if (_settings.AllowNull && string.Equals(s, _settings.NullToken, StringComparison.Ordinal))
|
||||
{
|
||||
_list.Add(null);
|
||||
}
|
||||
else
|
||||
{
|
||||
_list.Add(s);
|
||||
}
|
||||
_work.Length = 0;
|
||||
|
||||
// Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space
|
||||
// Checks if the second parameter of the if statement will pass through successfully
|
||||
// e.g. `"bob", "mary", "bill"`
|
||||
if (_position + 2 <= _line.Length - 1)
|
||||
{
|
||||
if (_line[_position + 1].Equals(' ') && _line[_position + 2].Equals(_settings.TextQualifier))
|
||||
{
|
||||
_position++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Regular character
|
||||
else
|
||||
{
|
||||
_work.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
State = CSVState.Done;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
298
CSVNET/CSVWriter.cs
Normal file
298
CSVNET/CSVWriter.cs
Normal file
@@ -0,0 +1,298 @@
|
||||
/*
|
||||
* 2006 - 2018 Ted Spence, http://tedspence.com
|
||||
* License: http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Home page: https://github.com/tspence/csharp-csv-reader
|
||||
*/
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Data;
|
||||
using System.Reflection;
|
||||
#if HAS_ASYNC
|
||||
using System.Threading.Tasks;
|
||||
#endif
|
||||
|
||||
// ReSharper disable LoopCanBeConvertedToQuery
|
||||
// ReSharper disable ConvertIfStatementToNullCoalescingExpression
|
||||
|
||||
namespace CSVNET
|
||||
{
|
||||
/// <summary>
|
||||
/// Helper object that implements serialization separately from the string or stream I/O
|
||||
/// </summary>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
public class SerializationHelper<T> where T : class, new()
|
||||
{
|
||||
private readonly CSVSettings _settings;
|
||||
private readonly PropertyInfo[] _properties;
|
||||
private readonly FieldInfo[] _fields;
|
||||
private readonly char[] _riskyChars;
|
||||
private readonly Dictionary<Type, int> _forceQualifierTypes;
|
||||
|
||||
/// <summary>
|
||||
/// Constructs a serialization helper object separate from I/O
|
||||
/// </summary>
|
||||
/// <param name="settings"></param>
|
||||
/// <param name="riskyChars"></param>
|
||||
/// <param name="forceQualifierTypes"></param>
|
||||
public SerializationHelper(CSVSettings settings, char[] riskyChars, Dictionary<Type, int> forceQualifierTypes)
|
||||
{
|
||||
_settings = settings;
|
||||
if (_settings == null)
|
||||
{
|
||||
_settings = CSVSettings.CSV;
|
||||
}
|
||||
|
||||
// Extract properties and fields that are not excluded
|
||||
var excluded = new ExcludedColumnHelper(_settings);
|
||||
var props = new List<PropertyInfo>();
|
||||
foreach (var prop in typeof(T).GetProperties())
|
||||
{
|
||||
if (!excluded.IsExcluded(prop.Name))
|
||||
{
|
||||
props.Add(prop);
|
||||
}
|
||||
}
|
||||
|
||||
var fields = new List<FieldInfo>();
|
||||
foreach (var field in typeof(T).GetFields())
|
||||
{
|
||||
if (!excluded.IsExcluded(field.Name))
|
||||
{
|
||||
fields.Add(field);
|
||||
}
|
||||
}
|
||||
|
||||
_properties = props.ToArray();
|
||||
_fields = fields.ToArray();
|
||||
_riskyChars = riskyChars;
|
||||
_forceQualifierTypes = forceQualifierTypes;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Serialize the header for the CSV file
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public string SerializeHeader()
|
||||
{
|
||||
var headers = new List<object>();
|
||||
foreach (var field in _fields)
|
||||
{
|
||||
headers.Add(field.Name);
|
||||
}
|
||||
foreach (var prop in _properties)
|
||||
{
|
||||
headers.Add(prop.Name);
|
||||
}
|
||||
|
||||
return CSV.ItemsToCsv(headers, _settings, _riskyChars, _forceQualifierTypes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Serialize a single row for the CSV file
|
||||
/// </summary>
|
||||
/// <param name="obj"></param>
|
||||
/// <returns></returns>
|
||||
public string Serialize(T obj)
|
||||
{
|
||||
var items = new List<object>();
|
||||
foreach (var field in _fields)
|
||||
{
|
||||
items.Add(field.GetValue(obj));
|
||||
}
|
||||
foreach (var prop in _properties)
|
||||
{
|
||||
items.Add(prop.GetValue(obj, null));
|
||||
}
|
||||
return CSV.ItemsToCsv(items, _settings, _riskyChars, _forceQualifierTypes);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes CSV objects to a stream
|
||||
/// </summary>
|
||||
public class CSVWriter : IDisposable
|
||||
{
|
||||
private readonly CSVSettings _settings;
|
||||
private readonly StreamWriter _writer;
|
||||
private readonly char[] _riskyChars;
|
||||
private readonly Dictionary<Type, int> _forceQualifierTypes;
|
||||
|
||||
/// <summary>
|
||||
/// Construct a new CSV writer to produce output on the enclosed StreamWriter
|
||||
/// </summary>
|
||||
/// <param name="dest">The stream where this CSV will be outputted</param>
|
||||
/// <param name="settings">The CSV settings to use when writing to the stream (Default: CSV)</param>
|
||||
public CSVWriter(StreamWriter dest, CSVSettings settings = null)
|
||||
{
|
||||
_writer = dest;
|
||||
_settings = settings;
|
||||
if (_settings == null)
|
||||
{
|
||||
_settings = CSVSettings.CSV;
|
||||
}
|
||||
_riskyChars = _settings.GetRiskyChars();
|
||||
_forceQualifierTypes = _settings.GetForceQualifierTypes();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Construct a new CSV writer to produce output on the enclosed stream
|
||||
/// </summary>
|
||||
/// <param name="dest">The stream where this CSV will be outputted</param>
|
||||
/// <param name="settings">The CSV settings to use when writing to the stream (Default: CSV)</param>
|
||||
public CSVWriter(Stream dest, CSVSettings settings = null)
|
||||
{
|
||||
_settings = settings;
|
||||
if (_settings == null)
|
||||
{
|
||||
_settings = CSVSettings.CSV;
|
||||
}
|
||||
_writer = new StreamWriter(dest, _settings.Encoding);
|
||||
_riskyChars = _settings.GetRiskyChars();
|
||||
_forceQualifierTypes = _settings.GetForceQualifierTypes();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Write the data table to a stream in CSV format
|
||||
/// </summary>
|
||||
/// <param name="dt">The data table to write</param>
|
||||
public void Write(DataTable dt)
|
||||
{
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
var headers = new List<object>();
|
||||
foreach (DataColumn col in dt.Columns)
|
||||
{
|
||||
headers.Add(col.ColumnName);
|
||||
}
|
||||
_writer.Write(CSV.ItemsToCsv(headers, _settings, _riskyChars, _forceQualifierTypes));
|
||||
_writer.Write(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
foreach (DataRow dr in dt.Rows)
|
||||
{
|
||||
_writer.Write(CSV.ItemsToCsv(dr.ItemArray, _settings, _riskyChars, _forceQualifierTypes));
|
||||
_writer.Write(_settings.LineSeparator);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Write a single line to this CSV
|
||||
/// </summary>
|
||||
/// <param name="items"></param>
|
||||
public void WriteLine(IEnumerable<object> items)
|
||||
{
|
||||
_writer.Write(CSV.ItemsToCsv(items, _settings, _riskyChars, _forceQualifierTypes));
|
||||
_writer.Write(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
#if HAS_ASYNC
|
||||
/// <summary>
|
||||
/// Write a single line to this CSV
|
||||
/// </summary>
|
||||
/// <param name="items"></param>
|
||||
public async Task WriteLineAsync(IEnumerable<object> items)
|
||||
{
|
||||
await _writer.WriteAsync(CSV.ItemsToCsv(items, _settings, _riskyChars, _forceQualifierTypes));
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Write the data table to a stream in CSV format
|
||||
/// </summary>
|
||||
/// <param name="dt">The data table to write</param>
|
||||
public async Task WriteAsync(DataTable dt)
|
||||
{
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
var headers = new List<object>();
|
||||
foreach (DataColumn col in dt.Columns)
|
||||
{
|
||||
headers.Add(col.ColumnName);
|
||||
}
|
||||
await _writer.WriteAsync(CSV.ItemsToCsv(headers, _settings, _riskyChars, _forceQualifierTypes));
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
foreach (DataRow dr in dt.Rows)
|
||||
{
|
||||
await _writer.WriteAsync(CSV.ItemsToCsv(dr.ItemArray, _settings, _riskyChars, _forceQualifierTypes));
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Serialize a list of objects to CSV using this writer
|
||||
/// </summary>
|
||||
/// <param name="list">An IEnumerable that produces the list of objects to serialize.</param>
|
||||
public void Serialize<T>(IEnumerable<T> list) where T : class, new()
|
||||
{
|
||||
var serializer = new SerializationHelper<T>(_settings, _riskyChars, _forceQualifierTypes);
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
_writer.Write(serializer.SerializeHeader());
|
||||
_writer.Write(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
foreach (var row in list)
|
||||
{
|
||||
_writer.Write(serializer.Serialize(row));
|
||||
_writer.Write(_settings.LineSeparator);
|
||||
}
|
||||
}
|
||||
|
||||
#if HAS_ASYNC
|
||||
/// <summary>
|
||||
/// Serialize a list of objects to CSV using this writer
|
||||
/// </summary>
|
||||
/// <param name="list">An IEnumerable that produces the list of objects to serialize.</param>
|
||||
public async Task SerializeAsync<T>(IEnumerable<T> list) where T : class, new()
|
||||
{
|
||||
var serializer = new SerializationHelper<T>(_settings, _riskyChars, _forceQualifierTypes);
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
await _writer.WriteAsync(serializer.SerializeHeader());
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
foreach (var row in list)
|
||||
{
|
||||
await _writer.WriteAsync(serializer.Serialize(row));
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAS_ASYNC_IENUM
|
||||
/// <summary>
|
||||
/// Serialize a list of objects to CSV using this writer
|
||||
/// </summary>
|
||||
/// <param name="list">An IEnumerable that produces the list of objects to serialize.</param>
|
||||
public async Task SerializeAsync<T>(IAsyncEnumerable<T> list) where T : class, new()
|
||||
{
|
||||
var serializer = new SerializationHelper<T>(_settings, _riskyChars, _forceQualifierTypes);
|
||||
if (_settings.HeaderRowIncluded)
|
||||
{
|
||||
await _writer.WriteAsync(serializer.SerializeHeader());
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
|
||||
await foreach (var row in list)
|
||||
{
|
||||
await _writer.WriteAsync(serializer.Serialize(row));
|
||||
await _writer.WriteAsync(_settings.LineSeparator);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Close our resources - specifically, the stream reader
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_writer.Close();
|
||||
_writer.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
36
CSVNET/Properties/AssemblyInfo.cs
Normal file
36
CSVNET/Properties/AssemblyInfo.cs
Normal file
@@ -0,0 +1,36 @@
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
// Allgemeine Informationen über eine Assembly werden über die folgenden
|
||||
// Attribute gesteuert. Ändern Sie diese Attributwerte, um die Informationen zu ändern,
|
||||
// die einer Assembly zugeordnet sind.
|
||||
[assembly: AssemblyTitle("CSVNET")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("HP Inc.")]
|
||||
[assembly: AssemblyProduct("CSVNET")]
|
||||
[assembly: AssemblyCopyright("Copyright © HP Inc. 2024")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Durch Festlegen von ComVisible auf FALSE werden die Typen in dieser Assembly
|
||||
// für COM-Komponenten unsichtbar. Wenn Sie auf einen Typ in dieser Assembly von
|
||||
// COM aus zugreifen müssen, sollten Sie das ComVisible-Attribut für diesen Typ auf "True" festlegen.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
// Die folgende GUID bestimmt die ID der Typbibliothek, wenn dieses Projekt für COM verfügbar gemacht wird
|
||||
[assembly: Guid("417a61bc-206d-4895-baae-e6155afbdf2a")]
|
||||
|
||||
// Versionsinformationen für eine Assembly bestehen aus den folgenden vier Werten:
|
||||
//
|
||||
// Hauptversion
|
||||
// Nebenversion
|
||||
// Buildnummer
|
||||
// Revision
|
||||
//
|
||||
// Sie können alle Werte angeben oder Standardwerte für die Build- und Revisionsnummern verwenden,
|
||||
// indem Sie "*" wie unten gezeigt eingeben:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
||||
BIN
CSVNET/bin/Debug/CSVNET.dll
Normal file
BIN
CSVNET/bin/Debug/CSVNET.dll
Normal file
Binary file not shown.
BIN
CSVNET/bin/Debug/CSVNET.pdb
Normal file
BIN
CSVNET/bin/Debug/CSVNET.pdb
Normal file
Binary file not shown.
@@ -0,0 +1,4 @@
|
||||
// <autogenerated />
|
||||
using System;
|
||||
using System.Reflection;
|
||||
[assembly: global::System.Runtime.Versioning.TargetFrameworkAttribute(".NETFramework,Version=v4.8", FrameworkDisplayName = ".NET Framework 4.8")]
|
||||
BIN
CSVNET/obj/Debug/CSVNET.csproj.AssemblyReference.cache
Normal file
BIN
CSVNET/obj/Debug/CSVNET.csproj.AssemblyReference.cache
Normal file
Binary file not shown.
1
CSVNET/obj/Debug/CSVNET.csproj.CoreCompileInputs.cache
Normal file
1
CSVNET/obj/Debug/CSVNET.csproj.CoreCompileInputs.cache
Normal file
@@ -0,0 +1 @@
|
||||
7618766358e265572e5abcaa4cafd36b0a57b01f36171f2711451a6eafc40e83
|
||||
6
CSVNET/obj/Debug/CSVNET.csproj.FileListAbsolute.txt
Normal file
6
CSVNET/obj/Debug/CSVNET.csproj.FileListAbsolute.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
E:\Software-Projekte\OnDoc\OnDoc\CSVNET\bin\Debug\CSVNET.dll
|
||||
E:\Software-Projekte\OnDoc\OnDoc\CSVNET\bin\Debug\CSVNET.pdb
|
||||
E:\Software-Projekte\OnDoc\OnDoc\CSVNET\obj\Debug\CSVNET.csproj.AssemblyReference.cache
|
||||
E:\Software-Projekte\OnDoc\OnDoc\CSVNET\obj\Debug\CSVNET.csproj.CoreCompileInputs.cache
|
||||
E:\Software-Projekte\OnDoc\OnDoc\CSVNET\obj\Debug\CSVNET.dll
|
||||
E:\Software-Projekte\OnDoc\OnDoc\CSVNET\obj\Debug\CSVNET.pdb
|
||||
BIN
CSVNET/obj/Debug/CSVNET.dll
Normal file
BIN
CSVNET/obj/Debug/CSVNET.dll
Normal file
Binary file not shown.
BIN
CSVNET/obj/Debug/CSVNET.pdb
Normal file
BIN
CSVNET/obj/Debug/CSVNET.pdb
Normal file
Binary file not shown.
BIN
CSVNET/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache
Normal file
BIN
CSVNET/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache
Normal file
Binary file not shown.
@@ -0,0 +1,4 @@
|
||||
// <autogenerated />
|
||||
using System;
|
||||
using System.Reflection;
|
||||
[assembly: global::System.Runtime.Versioning.TargetFrameworkAttribute(".NETFramework,Version=v4.8", FrameworkDisplayName = ".NET Framework 4.8")]
|
||||
BIN
CSVNET/obj/Release/CSVNET.csproj.AssemblyReference.cache
Normal file
BIN
CSVNET/obj/Release/CSVNET.csproj.AssemblyReference.cache
Normal file
Binary file not shown.
Reference in New Issue
Block a user