Recently I need to parse CSV (comma separated values) text data into datatable. Standard format where first line is column names,
values are separated by commas and possible quoted.
I’ve used a powerful regex in .net to parse that data:
“(\”([^\"]*|\”{2})*\”(,|$))|\”[^\"]*\”(,|$)|[^,]+(,|$)|(,)” As usually it’s really unreadable.
And as result I’ve a class to parse CSV to datatable. Not much comments, just want to post the class, because was really sad after
search in Google… there are no ready solution for this common problem (if you know some – let me know in comments).
Here is the code:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Data;
using System.Text.RegularExpressions;
namespace WebCrawler
{
public class CSVParser
{
Stream _stream;
/// <summary>
/// Creates Parser from Stream
/// </summary>
/// <param name="aStream"></param>
public CSVParser(Stream aStream)
{
_stream = aStream;
CSVregEx = new System.Text.RegularExpressions.Regex("(\"([^\"]*|\"{2})*\"(,|$))|\"[^\"]*\"(,|$)|[^,]+(,|$)|(,)");
}
string delimiter = ",";
string quotes = "\"";
System.Text.RegularExpressions.Regex CSVregEx;
protected string[] BreakCSV(string source)
{
MatchCollection matches = CSVregEx.Matches(source);
string[] res = new string[matches.Count];
int i = 0;
foreach (Match m in matches)
{
res[i] = m.Groups[0].Value.TrimEnd(delimiter[0]).Trim(quotes[0]);
i++;
}
return res;
}
private string _TableName = "CSV";
public string TableName
{
get { return _TableName; }
set { _TableName = value; }
}
public DataTable ParseToDataTable()
{
StreamReader reader = new StreamReader(_stream);
string firstLine = reader.ReadLine();
string[] columns = BreakCSV(firstLine);
DataTable result = new DataTable();
result.TableName = TableName;
foreach (string s in columns)
{
DataColumn cm = new DataColumn(s, typeof(String));
cm.ColumnMapping = MappingType.Attribute;
result.Columns.Add(cm);
}
string line ="";
while (!reader.EndOfStream)
{
line = reader.ReadLine();
string[] data = BreakCSV(line);
int i = 0;
DataRow dr = result.NewRow();
foreach (string d in data)
{
dr[i++] = d;
}
result.Rows.Add(dr);
}
reader.Close();
_stream.Close();
return result;
}
}
}
values are separated by commas and possible quoted.
I’ve used a powerful regex in .net to parse that data:
“(\”([^\"]*|\”{2})*\”(,|$))|\”[^\"]*\”(,|$)|[^,]+(,|$)|(,)” As usually it’s really unreadable.
And as result I’ve a class to parse CSV to datatable. Not much comments, just want to post the class, because was really sad after
search in Google… there are no ready solution for this common problem (if you know some – let me know in comments).
Here is the code:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Data;
using System.Text.RegularExpressions;
namespace WebCrawler
{
public class CSVParser
{
Stream _stream;
/// <summary>
/// Creates Parser from Stream
/// </summary>
/// <param name="aStream"></param>
public CSVParser(Stream aStream)
{
_stream = aStream;
CSVregEx = new System.Text.RegularExpressions.Regex("(\"([^\"]*|\"{2})*\"(,|$))|\"[^\"]*\"(,|$)|[^,]+(,|$)|(,)");
}
string delimiter = ",";
string quotes = "\"";
System.Text.RegularExpressions.Regex CSVregEx;
protected string[] BreakCSV(string source)
{
MatchCollection matches = CSVregEx.Matches(source);
string[] res = new string[matches.Count];
int i = 0;
foreach (Match m in matches)
{
res[i] = m.Groups[0].Value.TrimEnd(delimiter[0]).Trim(quotes[0]);
i++;
}
return res;
}
private string _TableName = "CSV";
public string TableName
{
get { return _TableName; }
set { _TableName = value; }
}
public DataTable ParseToDataTable()
{
StreamReader reader = new StreamReader(_stream);
string firstLine = reader.ReadLine();
string[] columns = BreakCSV(firstLine);
DataTable result = new DataTable();
result.TableName = TableName;
foreach (string s in columns)
{
DataColumn cm = new DataColumn(s, typeof(String));
cm.ColumnMapping = MappingType.Attribute;
result.Columns.Add(cm);
}
string line ="";
while (!reader.EndOfStream)
{
line = reader.ReadLine();
string[] data = BreakCSV(line);
int i = 0;
DataRow dr = result.NewRow();
foreach (string d in data)
{
dr[i++] = d;
}
result.Rows.Add(dr);
}
reader.Close();
_stream.Close();
return result;
}
}
}
ليست هناك تعليقات:
إرسال تعليق