/* * 2006 - 2018 Ted Spence, http://tedspence.com * License: http://www.apache.org/licenses/LICENSE-2.0 * Home page: https://github.com/tspence/csharp-csv-reader */ using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Text; #if HAS_ASYNC using System.Threading.Tasks; #endif // These suggestions from Resharper apply because we don't want it to recommend fixing things needed for Net20: // ReSharper disable LoopCanBeConvertedToQuery // ReSharper disable ConvertIfStatementToNullCoalescingAssignment // ReSharper disable ReplaceSubstringWithRangeIndexer // ReSharper disable InvertIf // ReSharper disable ConvertIfStatementToSwitchExpression // ReSharper disable ConvertIfStatementToSwitchStatement namespace CSVNET { /// /// Root class that contains static functions for straightforward CSV parsing /// public static class CSV { /// /// Use this to determine what version of DotNet was used to build this library /// #if NET2_0 public const string VERSION = "NET20"; #elif NET4_0 public const string VERSION = "NET40"; #elif NET4_5 public const string VERSION = "NET45"; #elif NET5_0 public const string VERSION = "NET50"; #elif NET6_0 public const string VERSION = "NET60"; #elif NETSTANDARD1_0 public const string VERSION = "NETSTANDARD10"; #elif NETSTANDARD2_0 public const string VERSION = "NETSTANDARD20"; #else public const string VERSION = "UNKNOWN"; #endif /// /// Parse a CSV stream into ]]>, while permitting embedded newlines /// /// The stream to read /// The CSV settings to use for this parsing operation (Default: CSV) /// An enumerable object that can be examined to retrieve rows from the stream. public static IEnumerable ParseStream(StreamReader inStream, CSVSettings settings = null) { int bufferSize = settings?.BufferSize ?? CSVSettings.DEFAULT_BUFFER_SIZE; var buffer = new char[bufferSize]; var machine = new CSVStateMachine(settings); while (machine.State == CSVState.CanKeepGoing) { var line = string.Empty; if (machine.NeedsMoreText() && !inStream.EndOfStream) { var readChars = inStream.ReadBlock(buffer, 0, bufferSize); line = new string(buffer, 0, readChars); } var row = machine.ParseChunk(line, inStream.EndOfStream); if (row != null) { yield return row; } else if (inStream.EndOfStream) { break; } } } #if HAS_ASYNC_IENUM /// /// Parse a CSV stream into ]]> asynchronously, while permitting embedded newlines /// /// The stream to read /// The CSV settings to use for this parsing operation (Default: CSV) /// An enumerable object that can be examined to retrieve rows from the stream. public static async IAsyncEnumerable ParseStreamAsync(StreamReader inStream, CSVSettings settings = null) { int bufferSize = settings?.BufferSize ?? CSVSettings.DEFAULT_BUFFER_SIZE; var buffer = new char[bufferSize]; var machine = new CSVStateMachine(settings); while (machine.State == CSVState.CanKeepGoing) { var line = string.Empty; if (machine.NeedsMoreText() && !inStream.EndOfStream) { var readChars = await inStream.ReadBlockAsync(buffer, 0, bufferSize); line = new string(buffer, 0, readChars); } var row = machine.ParseChunk(line, inStream.EndOfStream); if (row != null) { yield return row; } else if (inStream.EndOfStream) { break; } } } #endif /// /// Parse a line from a CSV file and return an array of fields, or null if it fails /// /// One line of text from a CSV file /// The CSV settings to use for this parsing operation (Default: CSV) /// If this value is true, throws an exception if parsing fails /// An array containing all fields in the next row of data, or null if it could not be parsed. public static string[] ParseLine(string line, CSVSettings settings = null, bool? throwOnFailure = null) { string[] row = null; var machine = new CSVStateMachine(settings); while (machine.State == CSVState.CanKeepGoing) { row = machine.ParseChunk(line, true); line = string.Empty; } // Anything other than success throws an error here if (machine.State != CSVState.Done) { throw new Exception($"Malformed CSV structure: {machine.State}"); } return row; } /// /// Try to parse a line of CSV data. Can only return false if an unterminated text qualifier is encountered. /// /// This function cannot recognize 'sep=' lines because it does not know whether it is parsing the first line /// in the overall CSV stream. /// /// False if there was an unterminated text qualifier in the /// The line of text to parse /// The CSV settings to use for this parsing operation (Default: CSV) /// The array of fields found in the line public static bool TryParseLine(string line, out string[] row, CSVSettings settings = null) { row = null; var machine = new CSVStateMachine(settings); while (machine.State == CSVState.CanKeepGoing) { row = machine.ParseChunk(line, true); line = string.Empty; } return machine.State == CSVState.Done; } /// /// Deserialize a CSV string into a list of typed objects /// /// The type of objects to deserialize /// The CSV settings to use when parsing the source (Default: CSV) /// The source CSV to deserialize /// public static IEnumerable Deserialize(string source, CSVSettings settings = null) where T : class, new() { return CSVReader.FromString(source, settings).Deserialize(); } #if HAS_ASYNC_IENUM /// /// Deserialize a CSV string into a list of typed objects /// /// The type of objects to deserialize /// The CSV settings to use when parsing the source (Default: CSV) /// The source CSV to deserialize /// public static IAsyncEnumerable DeserializeAsync(string source, CSVSettings settings = null) where T : class, new() { return CSVReader.FromString(source, settings).DeserializeAsync(); } #endif /// /// Serialize a sequence of objects into a CSV string /// /// A single line of CSV encoded data containing these values /// A list or array of objects to serialize /// The field delimiter character (Default: comma) #if NET2_0 public static string ToCSVString(IEnumerable row, CSVSettings settings = null) #else public static string ToCSVString(this IEnumerable row, CSVSettings settings = null) #endif { if (settings == null) { settings = CSVSettings.CSV; } var riskyChars = settings.GetRiskyChars(); var forceQualifierTypes = settings.GetForceQualifierTypes(); return ItemsToCsv(row, settings, riskyChars, forceQualifierTypes); } /// /// Serialize an array of objects to CSV format /// /// The type of objects to serialize from this CSV /// The array of objects to serialize /// The CSV settings to use when exporting this array (Default: CSV) /// The completed CSV string representing one line per element in list public static string Serialize(IEnumerable list, CSVSettings settings = null) where T : class, new() { if (settings == null) { settings = CSVSettings.CSV; } using (var ms = new MemoryStream()) { using (var cw = new CSVWriter(ms, settings)) { cw.Serialize(list); } var rawString = settings.Encoding.GetString(ms.ToArray()); return RemoveByteOrderMarker(rawString); } } private static string _byteOrderMarkUtf8 = Encoding.UTF8.GetString(Encoding.UTF8.GetPreamble()); internal static string RemoveByteOrderMarker(string rawString) { if (rawString.StartsWith(_byteOrderMarkUtf8, StringComparison.Ordinal)) { return rawString.Substring(_byteOrderMarkUtf8.Length); } return rawString; } /// /// Serialize an array of objects to CSV format /// /// The type of objects to serialize from this CSV /// The array of objects to serialize /// The stream to which we will send this CSV text /// The CSV settings to use when exporting this array (Default: CSV) /// The completed CSV string representing one line per element in list public static void Serialize(IEnumerable list, Stream stream, CSVSettings settings = null) where T : class, new() { using (var cw = new CSVWriter(stream, settings)) { cw.Serialize(list); } } #if HAS_ASYNC /// /// Serialize an array of objects to CSV format /// /// The type of objects to serialize from this CSV /// The array of objects to serialize /// The stream to which we will send this CSV text /// The CSV settings to use when exporting this array (Default: CSV) /// The completed CSV string representing one line per element in list public static Task SerializeAsync(IEnumerable list, Stream stream, CSVSettings settings = null) where T : class, new() { using (var cw = new CSVWriter(stream, settings)) { return cw.SerializeAsync(list); } } #endif #if HAS_ASYNC_IENUM /// /// Serialize an array of objects to CSV format /// /// The type of objects to serialize from this CSV /// The array of objects to serialize /// The stream to which we will send this CSV text /// The CSV settings to use when exporting this array (Default: CSV) /// The completed CSV string representing one line per element in list public static Task SerializeAsync(IAsyncEnumerable list, Stream stream, CSVSettings settings = null) where T : class, new() { using (var cw = new CSVWriter(stream, settings)) { return cw.SerializeAsync(list); } } #endif /// /// Add a CSV Header line to a StringBuilder for a specific type /// /// The StringBuilder to append data /// The CSV settings to use when exporting this array (Default: CSV) #if NET2_0 public static void AppendCSVHeader(StringBuilder sb, CSVSettings settings = null) where T: class, new() #else public static void AppendCSVHeader(this StringBuilder sb, CSVSettings settings = null) where T : class, new() #endif { var header = Serialize(new T[] { }, settings); sb.Append(header); } /// /// Appends a single object to a StringBuilder in CSV format as a single line /// /// The StringBuilder to append data /// The single object to append in CSV-line format /// The CSV settings to use when exporting this array (Default: CSV) /// The 1st type parameter. #if NET2_0 public static void AppendCSVLine(StringBuilder sb, T obj, CSVSettings settings = null) where T : class, new() #else public static void AppendCSVLine(this StringBuilder sb, T obj, CSVSettings settings = null) where T : class, new() #endif { if (settings == null) { settings = CSVSettings.CSV; } // Duplicate settings, but flag ourselves to ignore the header settings = settings.CloneWithNewDelimiter(settings.FieldDelimiter); settings.HeaderRowIncluded = false; var line = Serialize(new T[] { obj }, settings); sb.Append(line); } /// /// Internal method to convert a list of things into a CSV line using the specified settings object /// /// This function assumes: /// * That the list of items is not null, but it may contain nulls /// * That settings is not null /// * That RiskyChars and ForceQualifierTypes have been set up correctly to match the CSV settings /// /// /// /// /// /// internal static string ItemsToCsv(IEnumerable items, CSVSettings settings, char[] riskyChars, Dictionary forceQualifierTypes) { var sb = new StringBuilder(); foreach (var item in items) { // If this is null, check our settings for what they want us to do if (item == null) { if (settings.AllowNull) { sb.Append(settings.NullToken); } sb.Append(settings.FieldDelimiter); continue; } // Special cases for other types of serialization string s; if (item is string) { s = item as string; } else if (item is DateTime) { s = ((DateTime)item).ToString(settings.DateTimeFormat); } else { var itemType = item.GetType(); var interfaces = itemType.GetInterfaces(); bool isEnumerable = false; if (itemType != typeof(string)) { foreach (var itemInterface in interfaces) { if (itemInterface == typeof(IEnumerable)) { isEnumerable = true; } } } // Treat enumerables as a simple class of objects that can be unrolled if (isEnumerable) { IEnumerable enumerable = item as IEnumerable; s = string.Empty; switch (settings.NestedArrayBehavior) { case ArrayOptions.ToString: s = item.ToString(); break; case ArrayOptions.CountItems: if (enumerable != null) { int enumerableCount = 0; var iter = enumerable.GetEnumerator(); using (iter as IDisposable) { while (iter.MoveNext()) { enumerableCount++; } } s = enumerableCount.ToString(); } break; case ArrayOptions.TreatAsNull: if (settings.AllowNull) { s = settings.NullToken; } else { s = string.Empty; } break; case ArrayOptions.RecursiveSerialization: if (enumerable != null) { s = ItemsToCsv(enumerable, settings, riskyChars, forceQualifierTypes); } else { s = string.Empty; } break; } } else if (itemType.IsClass && settings.NestedObjectBehavior == ObjectOptions.RecursiveSerialization) { var nestedItems = new List(); foreach (var field in itemType.GetFields()) { nestedItems.Add(field.GetValue(item)); } foreach (var prop in itemType.GetProperties()) { nestedItems.Add(prop.GetValue(item, null)); } s = ItemsToCsv(nestedItems, settings, riskyChars, forceQualifierTypes); } else { s = item.ToString(); } } // Check if this item requires qualifiers var requiresQualifiers = settings.ForceQualifiers || s.IndexOfAny(riskyChars) >= 0 || (forceQualifierTypes != null && forceQualifierTypes.ContainsKey(item.GetType())); // Okay, let's handle this value normally if (requiresQualifiers) sb.Append(settings.TextQualifier); if (!string.IsNullOrEmpty(s)) { // Only go character-by-character if necessary if (s.IndexOf(settings.TextQualifier) >= 0) { foreach (var c in s) { // Double up text qualifiers if (c == settings.TextQualifier) { sb.Append(c); } sb.Append(c); } } else { sb.Append(s); } } // Move to the next cell if (requiresQualifiers) sb.Append(settings.TextQualifier); sb.Append(settings.FieldDelimiter); } // Subtract the trailing delimiter so we don't inadvertently add an empty column at the end if (sb.Length > 0) { sb.Length -= 1; } return sb.ToString(); } /// /// Parse a separator line and determine /// /// /// The separator public static char? ParseSepLine(string line) { if (line.StartsWith("sep", StringComparison.OrdinalIgnoreCase)) { var equals = line.Substring(3).Trim(); if (equals.StartsWith("=")) { var separator = equals.Substring(1).Trim(); if (separator.Length > 1) { throw new Exception("Separator in 'sep=' line must be a single character"); } if (separator.Length == 1) { return separator[0]; } } } // This wasn't a sep line return null; } } }