如何让我的 StreamWriter 更快?
How to make my StreamWriter faster?
我有这样的 C# 代码:
string strFilePath = @
"E:\input.txt";
string strFilePath2 = @
"E:\output.txt";
const int BufferSize = 65536; // 64 Kilobytes
FileStream fs = new FileStream(strFilePath2, FileMode.OpenOrCreate);
using(StreamWriter sw = new StreamWriter(fs)) {
for (int ww = 0; ww < File.ReadLines(strFilePath).Count(); ww++) {
string tx2 = GetLine(strFilePath, ww).Replace("||", "| |");
var first = String.Join("", tx2.TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-'));
var third = String.Join("", tx2.Reverse().TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-').Reverse());
var second = tx2.Replace(first, "").Replace(third, "");
string awal = first.ToString();
string dua = third.ToString();
string gabung = (awal + dua).Replace("||", "|") + Environment.NewLine;
string[] pdua = dua.Split('|');
int totalkanan = int.Parse(pdua[1]) + int.Parse(pdua[2]) + int.Parse(pdua[3]) + int.Parse(pdua[4]) + int.Parse(pdua[5]) + int.Parse(pdua[6]) + int.Parse(pdua[7]) + int.Parse(pdua[8]) + int.Parse(pdua[9]) + int.Parse(pdua[10]) + int.Parse(pdua[11]) + int.Parse(pdua[12]) + int.Parse(pdua[13]) + int.Parse(pdua[14]) + int.Parse(pdua[15]) + int.Parse(pdua[16]) + int.Parse(pdua[17]) + int.Parse(pdua[18]) + int.Parse(pdua[19]);
if (totalkanan > 0) {
sw.Write(gabung);
}
}
}
获取线路方法:
string GetLine(string fileName, int line)
{
using (var sr = new StreamReader(fileName))
{
sr.ReadLine();
for (int i = 1; i < line; i++)
sr.ReadLine();
return sr.ReadLine();
}
}
输入示例:
937|41|0|0|0|0|484|0|0|0|0|0|0|0|20||1|First lesson is when you'll be tested on your knowledge of the Elements of Darkness. Your task is to go get 20 Darkness elements from the Bale professors at the Zone of Darkness Elements.|2582|4342|1|0|0|0|0|470|0|0|0|0|0|0|0|0|0|0|0|
937|42|4335|1|0|0|470|0|0|0|0|0|0|0|2|Pass the test.|1||0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|43|0|0|0|0|483|0|0|0|0|0|0|0|42||1|There are no more tests to take.|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|44|7928|20|4348|1|481|0|0|0|0|0|0|0|42||1|Congratulations on your completion of all the lessons! Come back in a little while to receive your diploma.|2583|7928|-20|4348|-1|0|0|483|0|0|0|0|0|0|0|0|0|0|0|
937|45|7927|20|4347|1|479|0|0|0|0|0|0|0|42||1|Very nice~! Next, you'll be tested on the Elements of light.|2584|7927|-20|4347|-1|0|0|481|0|0|0|0|0|0|0|0|0|0|0|
937|46|7926|20|4346|1|477|0|0|0|0|0|0|0|42||1|Very nice~! Next, you'll be tested on the Elements of light.|2585|7926|-20|4346|-1|0|0|479|0|0|0|0|0|0|0|0|0|0|0|
输出:
937|41|0|0|0|0|484|0|0|0|0|0|0|0|20|2582|4342|1|0|0|0|0|470|0|0|0|0|0|0|0|0|0|0|0|
937|42|4335|1|0|0|470|0|0|0|0|0|0|0|2|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|43|0|0|0|0|483|0|0|0|0|0|0|0|42|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|44|7928|20|4348|1|481|0|0|0|0|0|0|0|42|2583|7928|-20|4348|-1|0|0|483|0|0|0|0|0|0|0|0|0|0|0|
937|45|7927|20|4347|1|479|0|0|0|0|0|0|0|42|2584|7927|-20|4347|-1|0|0|481|0|0|0|0|0|0|0|0|0|0|0|
937|46|7926|20|4346|1|477|0|0|0|0|0|0|0|42|2585|7926|-20|4346|-1|0|0|479|0|0|0|0|0|0|0|0|0|0|0|
这个程序可以运行,但问题是当输入文件有 17k++ 行时,写入进度非常慢,但是当输入文件只有 4k+ 时,它只需要大约 10 秒,逻辑是为什么 4k 需要 10 秒,而17k多了5分钟?
有什么解决这个问题的建议吗?提前致谢!
您的 GetLine()
方法必须读取整个文件(直到有问题的行),每次读取 1 行。
因此,您会看到性能呈指数级下降。
为什么不边读边读每一行?
string strFilePath = @"E:\input.txt";
string strFilePath2 = @"E:\output.txt";
const int BufferSize = 65536; // 64 Kilobytes
using (StreamWriter sw = new StreamWriter(fs))
{
foreach (var line in File.ReadLines(strFilePath))
{
string tx2 = line.Replace("||", "| |");
var first = String.Join("", tx2.TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-'));
var third = String.Join("", tx2.Reverse().TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-').Reverse());
var second = tx2.Replace(first, "").Replace(third, "");
string awal = first.ToString();
string dua = third.ToString();
string gabung = (awal + dua).Replace("||", "|") + Environment.NewLine;
string[] pdua = dua.Split('|');
int totalkanan = int.Parse(pdua[1]) + int.Parse(pdua[2]) + int.Parse(pdua[3]) + int.Parse(pdua[4]) + int.Parse(pdua[5]) + int.Parse(pdua[6]) + int.Parse(pdua[7]) + int.Parse(pdua[8]) + int.Parse(pdua[9]) + int.Parse(pdua[10]) + int.Parse(pdua[11]) + int.Parse(pdua[12]) + int.Parse(pdua[13]) + int.Parse(pdua[14]) + int.Parse(pdua[15]) + int.Parse(pdua[16]) + int.Parse(pdua[17]) + int.Parse(pdua[18]) + int.Parse(pdua[19]);
if (totalkanan > 0)
{
sw.Write(gabung);
}
}
}
您正在以低效的方式进行 很多 字符串操作,这将导致大量分配并因此导致大量垃圾收集,这两者都会减慢您的速度.
看起来您正在读取文件一次以获取行数,然后每行每次都重新读取该文件以从文件中获取下一行!
File class 还有其他更有效的方法,请改为这样做:
int lineNumber = 0;
foreach (var line in File.ReadLines(path))
{
lineNumber++;
// process line
}
您还知道数组从索引 0 开始,pdua[1]
是数组中的第二项...
我有这样的 C# 代码:
string strFilePath = @
"E:\input.txt";
string strFilePath2 = @
"E:\output.txt";
const int BufferSize = 65536; // 64 Kilobytes
FileStream fs = new FileStream(strFilePath2, FileMode.OpenOrCreate);
using(StreamWriter sw = new StreamWriter(fs)) {
for (int ww = 0; ww < File.ReadLines(strFilePath).Count(); ww++) {
string tx2 = GetLine(strFilePath, ww).Replace("||", "| |");
var first = String.Join("", tx2.TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-'));
var third = String.Join("", tx2.Reverse().TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-').Reverse());
var second = tx2.Replace(first, "").Replace(third, "");
string awal = first.ToString();
string dua = third.ToString();
string gabung = (awal + dua).Replace("||", "|") + Environment.NewLine;
string[] pdua = dua.Split('|');
int totalkanan = int.Parse(pdua[1]) + int.Parse(pdua[2]) + int.Parse(pdua[3]) + int.Parse(pdua[4]) + int.Parse(pdua[5]) + int.Parse(pdua[6]) + int.Parse(pdua[7]) + int.Parse(pdua[8]) + int.Parse(pdua[9]) + int.Parse(pdua[10]) + int.Parse(pdua[11]) + int.Parse(pdua[12]) + int.Parse(pdua[13]) + int.Parse(pdua[14]) + int.Parse(pdua[15]) + int.Parse(pdua[16]) + int.Parse(pdua[17]) + int.Parse(pdua[18]) + int.Parse(pdua[19]);
if (totalkanan > 0) {
sw.Write(gabung);
}
}
}
获取线路方法:
string GetLine(string fileName, int line)
{
using (var sr = new StreamReader(fileName))
{
sr.ReadLine();
for (int i = 1; i < line; i++)
sr.ReadLine();
return sr.ReadLine();
}
}
输入示例:
937|41|0|0|0|0|484|0|0|0|0|0|0|0|20||1|First lesson is when you'll be tested on your knowledge of the Elements of Darkness. Your task is to go get 20 Darkness elements from the Bale professors at the Zone of Darkness Elements.|2582|4342|1|0|0|0|0|470|0|0|0|0|0|0|0|0|0|0|0|
937|42|4335|1|0|0|470|0|0|0|0|0|0|0|2|Pass the test.|1||0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|43|0|0|0|0|483|0|0|0|0|0|0|0|42||1|There are no more tests to take.|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|44|7928|20|4348|1|481|0|0|0|0|0|0|0|42||1|Congratulations on your completion of all the lessons! Come back in a little while to receive your diploma.|2583|7928|-20|4348|-1|0|0|483|0|0|0|0|0|0|0|0|0|0|0|
937|45|7927|20|4347|1|479|0|0|0|0|0|0|0|42||1|Very nice~! Next, you'll be tested on the Elements of light.|2584|7927|-20|4347|-1|0|0|481|0|0|0|0|0|0|0|0|0|0|0|
937|46|7926|20|4346|1|477|0|0|0|0|0|0|0|42||1|Very nice~! Next, you'll be tested on the Elements of light.|2585|7926|-20|4346|-1|0|0|479|0|0|0|0|0|0|0|0|0|0|0|
输出:
937|41|0|0|0|0|484|0|0|0|0|0|0|0|20|2582|4342|1|0|0|0|0|470|0|0|0|0|0|0|0|0|0|0|0|
937|42|4335|1|0|0|470|0|0|0|0|0|0|0|2|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|43|0|0|0|0|483|0|0|0|0|0|0|0|42|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|
937|44|7928|20|4348|1|481|0|0|0|0|0|0|0|42|2583|7928|-20|4348|-1|0|0|483|0|0|0|0|0|0|0|0|0|0|0|
937|45|7927|20|4347|1|479|0|0|0|0|0|0|0|42|2584|7927|-20|4347|-1|0|0|481|0|0|0|0|0|0|0|0|0|0|0|
937|46|7926|20|4346|1|477|0|0|0|0|0|0|0|42|2585|7926|-20|4346|-1|0|0|479|0|0|0|0|0|0|0|0|0|0|0|
这个程序可以运行,但问题是当输入文件有 17k++ 行时,写入进度非常慢,但是当输入文件只有 4k+ 时,它只需要大约 10 秒,逻辑是为什么 4k 需要 10 秒,而17k多了5分钟? 有什么解决这个问题的建议吗?提前致谢!
您的 GetLine()
方法必须读取整个文件(直到有问题的行),每次读取 1 行。
因此,您会看到性能呈指数级下降。
为什么不边读边读每一行?
string strFilePath = @"E:\input.txt";
string strFilePath2 = @"E:\output.txt";
const int BufferSize = 65536; // 64 Kilobytes
using (StreamWriter sw = new StreamWriter(fs))
{
foreach (var line in File.ReadLines(strFilePath))
{
string tx2 = line.Replace("||", "| |");
var first = String.Join("", tx2.TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-'));
var third = String.Join("", tx2.Reverse().TakeWhile(c => Char.IsDigit(c) || c == '|' || c == '-').Reverse());
var second = tx2.Replace(first, "").Replace(third, "");
string awal = first.ToString();
string dua = third.ToString();
string gabung = (awal + dua).Replace("||", "|") + Environment.NewLine;
string[] pdua = dua.Split('|');
int totalkanan = int.Parse(pdua[1]) + int.Parse(pdua[2]) + int.Parse(pdua[3]) + int.Parse(pdua[4]) + int.Parse(pdua[5]) + int.Parse(pdua[6]) + int.Parse(pdua[7]) + int.Parse(pdua[8]) + int.Parse(pdua[9]) + int.Parse(pdua[10]) + int.Parse(pdua[11]) + int.Parse(pdua[12]) + int.Parse(pdua[13]) + int.Parse(pdua[14]) + int.Parse(pdua[15]) + int.Parse(pdua[16]) + int.Parse(pdua[17]) + int.Parse(pdua[18]) + int.Parse(pdua[19]);
if (totalkanan > 0)
{
sw.Write(gabung);
}
}
}
您正在以低效的方式进行 很多 字符串操作,这将导致大量分配并因此导致大量垃圾收集,这两者都会减慢您的速度.
看起来您正在读取文件一次以获取行数,然后每行每次都重新读取该文件以从文件中获取下一行!
File class 还有其他更有效的方法,请改为这样做:
int lineNumber = 0;
foreach (var line in File.ReadLines(path))
{
lineNumber++;
// process line
}
您还知道数组从索引 0 开始,pdua[1]
是数组中的第二项...