对 CSV 原始字符串数据进行排序的有效方法
Efficient way to Sort CSV raw string data
我有如下所述的原始 csv 数据
James,Mary,Patricia,Anthony,Donald\n
145,10,100,39,101\n
21,212,313,28,1
在上面提到的字符串中,列以逗号 ,
分隔,第一行是 column
并且在每个 \n
之后是一个新行,其中数据是针对每个人的。我在这里想要实现的是它应该按如下所述进行排序。
Anthony,Donald,James,Mary,Patricia\n
39,101, 145,10,100\n
28,1,21,212,313
到目前为止我尝试过的是,基于\n
拆分,对每个值基于逗号,
进一步拆分,但在这种情况下将没有对排序值的正确引用。
我在哪个部分挣扎
string data = "James,Mary,Patricia,Anthony,Donald\n145,10,100,39,101\n21,212,313,28,1";
var rows = data.Split('\n');
var unorderedNames = rows[0].Split(',');
基于\n拆分数组
根据 ,
逗号拆分名称 -
现在,如果我实施排序,我相信我会丢失所有引用,因为名称将被排序,但下面第 2 行和第 3 行中提到的付款不会。
在我上面提到的代码中,第一行根据\n将数组分成三部分。然后当我软化第一行时,我相信我没有引用同一数组中的其他值。
如果你能帮助我找到一些有效的方法,以有效的方式将这些原始数据转换成按字母顺序排序的值,我将不胜感激。
我认为问题在于您想要将 CSV 的 headers 排序为某种“任意”顺序,并让数据“随之而来”
想出一些方法将您的数据表示为二维数组:
var lines = File.ReadAllLines("path");
var data = lines.Skip(1).Select(line => line.Split(',')).ToArray(); //nasty way of parsing a CSV but it's accessory to this discussion..
var head = lines[0]
.Split(',')
.Select((s,i) => new { Name = s, Index = i })
.OrderBy(at => at.Name)
.ToArray();
head
现在是排序后的 headers,但它有一个额外的 属性,它告诉您 data
中的哪一列包含该人的数据。 Anthony 在 heaD
中排名第一,但他们的 Index 是 3 所以我们应该从 data[3]
中获取 Anthony 的数据
foreach(var person in head){
Console.WriteLine($"Now printing {person.name} data from column {person.Index}");
foreach(var line in data){
Console.Writeline(line[person.Index]);
}
}
我们没有对数据进行排序(不排序效率更高),我们只是将它所在的列存储为确实排序的 object 的一部分,然后不管人员排序顺序如何,我们通过该列访问数据。排序 head
非常快,因为它只是几个名字。它始终保持其“数据在哪里”的地图,因为 Index
不会改变,无论 head
的排序顺序如何
public class StackDemo
{
private string source = "James,Mary,Patricia,Anthony,Donald\n145,10,100,39,101\n21,212,313,28,1";
public string ProcessString()
{
var rows = source.Split('\n');
var row1Values = rows[0].Split(',');
var row2Values = rows[1].Split(',');
var row3Values = rows[2].Split(',');
List<Person> people = new List<Person>();
for (int index = 0; index < 5; index++)
{
people.Add(new Person()
{
Name = row1Values[index],
SomeValue = row2Values[index],
OtherValue = row3Values[index]
});
}
people.Sort((x, y) => x.Name.CompareTo(y.Name));
List<string> names = new List<string>();
List<string> someValues = new List<string>();
List<string> otherValues = new List<string>();
foreach (Person p in people)
{
names.Add(p.Name);
someValues.Add(p.SomeValue);
otherValues.Add(p.OtherValue);
}
string result = "";
result = BuildString(names, result);
result = BuildString(someValues, result);
result = BuildString(otherValues, result);
result = result.Remove(result.Length - 1, 1);
return result;
}
private static string BuildString(List<string> names, string result)
{
foreach (string s in names)
{
result += s + ",";
}
result = result.Remove(result.Length - 1, 1);
result += "\n";
return result;
}
}
public class Person
{
public string Name { get; set; }
public string SomeValue { get; set; }
public string OtherValue { get; set; }
}
此代码非常基础,(粗鲁)但它可以满足我的需求?)
它 returns 与接收到的格式相同的字符串。
编辑:对评论问题进行了扩展!
添加了一些单元测试以帮助验证我如何理解您的问题:
public class UnitTest1
{
[Fact]
public void TestWith5()
{
string input = "James,Mary,Patricia,Anthony,Donald\n145,10,100,39,101\n21,212,313,28,1";
string expected = "Anthony,Donald,James,Mary,Patricia\n39,101,145,10,100\n28,1,21,212,313";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith4()
{
string input = "James,Mary,Patricia,Anthony,\n145,10,100,39,\n21,212,313,28,";
string expected = ",Anthony,James,Mary,Patricia\n,39,145,10,100\n,28,21,212,313";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith3()
{
string input = "James,Mary,Patricia,,\n145,10,100,,\n21,212,313,,";
string expected = ",,James,Mary,Patricia\n,,145,10,100\n,,21,212,313";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith2()
{
string input = ",,James,Mary,\n,,145,10,\n,,21,212,";
string expected = ",,,James,Mary\n,,,145,10\n,,,21,212";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith1()
{
string input = "James,,,,\n145,,,,\n21,,,,";
string expected = "James,,,,\n145,,,,\n21,,,,";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith0()
{
string input = ",,,,\n,,,,\n,,,,";
string expected = ",,,,\n,,,,\n,,,,";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
}
这里是实际的实现:
public interface IStringPeopleParser
{
List<Person> ConvertToPeople(string input);
}
public interface IPeopleStringParser
{
string ConvertPeopleToString(List<Person> people);
}
public class PeopleStringParser : IPeopleStringParser
{
public string ConvertPeopleToString(List<Person> people)
{
List<string> names = new List<string>();
List<string> someValues = new List<string>();
List<string> otherValues = new List<string>();
foreach (Person p in people)
{
names.Add(p.Name);
someValues.Add(p.SomeValue);
otherValues.Add(p.OtherValue);
}
string output = "";
output += string.Join(",", names);
output += "\n";
output += string.Join(",", someValues);
output += "\n";
output += string.Join(",", otherValues);
return output;
}
}
public class StringPeopleParser : IStringPeopleParser
{
public List<Person> ConvertToPeople(string source)
{
var rows = source.Split('\n');
string[] row1Values = rows[0].Split(',');
string[] row2Values = rows[1].Split(',');
string[] row3Values = rows[2].Split(',');
List<Person> people = new List<Person>();
for (int index = 0; index < row1Values.Length; index++)
{
people.Add(new Person()
{
Name = row1Values[index],
SomeValue = row2Values[index],
OtherValue = row3Values[index]
});
}
return people;
}
}
public class StackDemo3
{
IStringPeopleParser stringPeopleParser = new StringPeopleParser();
IPeopleStringParser peopleStringParser = new PeopleStringParser();
public string ProcessString(string s) {
List<Person> people = stringPeopleParser.ConvertToPeople(s);
int validCount = people.Where(x => x.IsValid()).Count();
switch (validCount)
{
case 0:
case 1:
{
return peopleStringParser.ConvertPeopleToString(people);
}
case 2:
case 3:
case 4:
case 5:
{
people = people.OrderBy(x => x.Name).ToList();
return peopleStringParser.ConvertPeopleToString(people);
}
default:
{
return "";//outside bounds of reality. Should never happen.
}
}
}
}
public class Person
{
public string Name { get; set; }
public string SomeValue { get; set; }
public string OtherValue { get; set; }
public bool IsValid() {
if (string.IsNullOrWhiteSpace(Name) || string.IsNullOrWhiteSpace(SomeValue) || string.IsNullOrWhiteSpace(OtherValue))
{
return false;
}
return true;
}
}
我也不知道你为什么不要这个人class?
您需要通过创建 Person class 在每行中的 3 个可能值之间进行引用(索引值是键),class 实例成为所述引用。
我有如下所述的原始 csv 数据
James,Mary,Patricia,Anthony,Donald\n
145,10,100,39,101\n
21,212,313,28,1
在上面提到的字符串中,列以逗号 ,
分隔,第一行是 column
并且在每个 \n
之后是一个新行,其中数据是针对每个人的。我在这里想要实现的是它应该按如下所述进行排序。
Anthony,Donald,James,Mary,Patricia\n
39,101, 145,10,100\n
28,1,21,212,313
到目前为止我尝试过的是,基于\n
拆分,对每个值基于逗号,
进一步拆分,但在这种情况下将没有对排序值的正确引用。
我在哪个部分挣扎
string data = "James,Mary,Patricia,Anthony,Donald\n145,10,100,39,101\n21,212,313,28,1";
var rows = data.Split('\n');
var unorderedNames = rows[0].Split(',');
基于\n拆分数组
根据 ,
逗号拆分名称 -
现在,如果我实施排序,我相信我会丢失所有引用,因为名称将被排序,但下面第 2 行和第 3 行中提到的付款不会。
在我上面提到的代码中,第一行根据\n将数组分成三部分。然后当我软化第一行时,我相信我没有引用同一数组中的其他值。
如果你能帮助我找到一些有效的方法,以有效的方式将这些原始数据转换成按字母顺序排序的值,我将不胜感激。
我认为问题在于您想要将 CSV 的 headers 排序为某种“任意”顺序,并让数据“随之而来”
想出一些方法将您的数据表示为二维数组:
var lines = File.ReadAllLines("path");
var data = lines.Skip(1).Select(line => line.Split(',')).ToArray(); //nasty way of parsing a CSV but it's accessory to this discussion..
var head = lines[0]
.Split(',')
.Select((s,i) => new { Name = s, Index = i })
.OrderBy(at => at.Name)
.ToArray();
head
现在是排序后的 headers,但它有一个额外的 属性,它告诉您 data
中的哪一列包含该人的数据。 Anthony 在 heaD
中排名第一,但他们的 Index 是 3 所以我们应该从 data[3]
foreach(var person in head){
Console.WriteLine($"Now printing {person.name} data from column {person.Index}");
foreach(var line in data){
Console.Writeline(line[person.Index]);
}
}
我们没有对数据进行排序(不排序效率更高),我们只是将它所在的列存储为确实排序的 object 的一部分,然后不管人员排序顺序如何,我们通过该列访问数据。排序 head
非常快,因为它只是几个名字。它始终保持其“数据在哪里”的地图,因为 Index
不会改变,无论 head
public class StackDemo
{
private string source = "James,Mary,Patricia,Anthony,Donald\n145,10,100,39,101\n21,212,313,28,1";
public string ProcessString()
{
var rows = source.Split('\n');
var row1Values = rows[0].Split(',');
var row2Values = rows[1].Split(',');
var row3Values = rows[2].Split(',');
List<Person> people = new List<Person>();
for (int index = 0; index < 5; index++)
{
people.Add(new Person()
{
Name = row1Values[index],
SomeValue = row2Values[index],
OtherValue = row3Values[index]
});
}
people.Sort((x, y) => x.Name.CompareTo(y.Name));
List<string> names = new List<string>();
List<string> someValues = new List<string>();
List<string> otherValues = new List<string>();
foreach (Person p in people)
{
names.Add(p.Name);
someValues.Add(p.SomeValue);
otherValues.Add(p.OtherValue);
}
string result = "";
result = BuildString(names, result);
result = BuildString(someValues, result);
result = BuildString(otherValues, result);
result = result.Remove(result.Length - 1, 1);
return result;
}
private static string BuildString(List<string> names, string result)
{
foreach (string s in names)
{
result += s + ",";
}
result = result.Remove(result.Length - 1, 1);
result += "\n";
return result;
}
}
public class Person
{
public string Name { get; set; }
public string SomeValue { get; set; }
public string OtherValue { get; set; }
}
此代码非常基础,(粗鲁)但它可以满足我的需求?)
它 returns 与接收到的格式相同的字符串。
编辑:对评论问题进行了扩展!
添加了一些单元测试以帮助验证我如何理解您的问题:
public class UnitTest1
{
[Fact]
public void TestWith5()
{
string input = "James,Mary,Patricia,Anthony,Donald\n145,10,100,39,101\n21,212,313,28,1";
string expected = "Anthony,Donald,James,Mary,Patricia\n39,101,145,10,100\n28,1,21,212,313";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith4()
{
string input = "James,Mary,Patricia,Anthony,\n145,10,100,39,\n21,212,313,28,";
string expected = ",Anthony,James,Mary,Patricia\n,39,145,10,100\n,28,21,212,313";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith3()
{
string input = "James,Mary,Patricia,,\n145,10,100,,\n21,212,313,,";
string expected = ",,James,Mary,Patricia\n,,145,10,100\n,,21,212,313";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith2()
{
string input = ",,James,Mary,\n,,145,10,\n,,21,212,";
string expected = ",,,James,Mary\n,,,145,10\n,,,21,212";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith1()
{
string input = "James,,,,\n145,,,,\n21,,,,";
string expected = "James,,,,\n145,,,,\n21,,,,";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
[Fact]
public void TestWith0()
{
string input = ",,,,\n,,,,\n,,,,";
string expected = ",,,,\n,,,,\n,,,,";
// arrange
StackDemo3 subject = new StackDemo3();
// act
string actualResult = subject.ProcessString(input);
// assert
Assert.Equal(expected, actualResult);
}
}
这里是实际的实现:
public interface IStringPeopleParser
{
List<Person> ConvertToPeople(string input);
}
public interface IPeopleStringParser
{
string ConvertPeopleToString(List<Person> people);
}
public class PeopleStringParser : IPeopleStringParser
{
public string ConvertPeopleToString(List<Person> people)
{
List<string> names = new List<string>();
List<string> someValues = new List<string>();
List<string> otherValues = new List<string>();
foreach (Person p in people)
{
names.Add(p.Name);
someValues.Add(p.SomeValue);
otherValues.Add(p.OtherValue);
}
string output = "";
output += string.Join(",", names);
output += "\n";
output += string.Join(",", someValues);
output += "\n";
output += string.Join(",", otherValues);
return output;
}
}
public class StringPeopleParser : IStringPeopleParser
{
public List<Person> ConvertToPeople(string source)
{
var rows = source.Split('\n');
string[] row1Values = rows[0].Split(',');
string[] row2Values = rows[1].Split(',');
string[] row3Values = rows[2].Split(',');
List<Person> people = new List<Person>();
for (int index = 0; index < row1Values.Length; index++)
{
people.Add(new Person()
{
Name = row1Values[index],
SomeValue = row2Values[index],
OtherValue = row3Values[index]
});
}
return people;
}
}
public class StackDemo3
{
IStringPeopleParser stringPeopleParser = new StringPeopleParser();
IPeopleStringParser peopleStringParser = new PeopleStringParser();
public string ProcessString(string s) {
List<Person> people = stringPeopleParser.ConvertToPeople(s);
int validCount = people.Where(x => x.IsValid()).Count();
switch (validCount)
{
case 0:
case 1:
{
return peopleStringParser.ConvertPeopleToString(people);
}
case 2:
case 3:
case 4:
case 5:
{
people = people.OrderBy(x => x.Name).ToList();
return peopleStringParser.ConvertPeopleToString(people);
}
default:
{
return "";//outside bounds of reality. Should never happen.
}
}
}
}
public class Person
{
public string Name { get; set; }
public string SomeValue { get; set; }
public string OtherValue { get; set; }
public bool IsValid() {
if (string.IsNullOrWhiteSpace(Name) || string.IsNullOrWhiteSpace(SomeValue) || string.IsNullOrWhiteSpace(OtherValue))
{
return false;
}
return true;
}
}
我也不知道你为什么不要这个人class? 您需要通过创建 Person class 在每行中的 3 个可能值之间进行引用(索引值是键),class 实例成为所述引用。