通过访问本地数据而不是 C# 中的远程数据库来优化我的程序
Optimizing my program by accessing data locally instead of a remote database in C#
我有一个数据库,其中包含 4 个表,其中包含数百万行。我的程序 运行 在几台计算机上计算数据,然后将其返回到数据库。我的程序设计的一个巨大瓶颈是每次计算都要下载数据,然后对其进行计算,然后将结果保存到数据库中。当我在本地网络上获得数据时,它以惊人的速度执行,所以我意识到从远程服务器下载数据的资源是问题所在。
我可以通过哪些方法在我的代码 运行 之前或之后从远程数据库保存数据,从而使我的程序更有效率。这些计算只完成一次,不再需要,我有 24 台计算机 运行 运行同一个程序。
static void Main(string[] args)
{
try
{
List<StockData> stockData = new List<StockData>();
List<StockMarketCompare> stockCompareData = new List<StockMarketCompare>();
List<StockData> sandpInfo = new List<StockData>();
List<StockData> sandpDateInfo = new List<StockData>();
List<StockData> globalList = new List<StockData>();
List<StockData> amexList = new List<StockData>();
List<StockData> nasdaqList = new List<StockData>();
List<StockData> nyseList = new List<StockData>();
List<DateTime> completedDates = new List<DateTime>();
SymbolInfo symbolClass = new SymbolInfo();
bool isGoodToGo = false;
string symbol, market;
int activeSymbolsCount = 0;
int rowCount = 0, completedRowCount = 0;
DateTime date = new DateTime();
DateTime searchDate = new DateTime();
// get the data here
using (StockRatingsTableAdapter stockRatingsAdapter = new StockRatingsTableAdapter())
using (OoplesDataSet.StockRatingsDataTable stockRatingsTable = new OoplesDataSet.StockRatingsDataTable())
using (SymbolsTableAdapter symbolAdapter = new SymbolsTableAdapter())
using (OoplesDataSet.SymbolsDataTable symbolTable = new OoplesDataSet.SymbolsDataTable())
using (DailyAmexDataTableAdapter dailyAmexAdapter = new DailyAmexDataTableAdapter())
using (OoplesDataSet.DailyAmexDataDataTable dailyAmexTable = new OoplesDataSet.DailyAmexDataDataTable())
using (OoplesDataSet.OldStockRatingsDataTable historicalRatingsTable = new OoplesDataSet.OldStockRatingsDataTable())
using (OldStockRatingsTableAdapter historicalRatingsAdapter = new OldStockRatingsTableAdapter())
using (OoplesDataSet.OldStockRatingsDataTable historicalRatingSymbolTable = new OoplesDataSet.OldStockRatingsDataTable())
using (OldStockRatingsTableAdapter historicalRatingSymbolAdapter = new OldStockRatingsTableAdapter())
using (OoplesDataSet.DailyGlobalDataDataTable sandp500Table = new OoplesDataSet.DailyGlobalDataDataTable())
using (OoplesDataSet.CurrentSymbolsDataTable currentSymbolTable = new OoplesDataSet.CurrentSymbolsDataTable())
using (CurrentSymbolsTableAdapter currentSymbolAdapter = new CurrentSymbolsTableAdapter())
{
// fill the s&p500 info first
dailyGlobalAdapter.ClearBeforeFill = true;
dailyGlobalAdapter.FillBySymbol(sandp500Table, Calculations.sp500);
var sandpQuery = from c in sandp500Table
select new StockData { Close = c.Close, Date = c.Date, High = c.High, Low = c.Low, Volume = c.Volume };
sandpInfo = sandpQuery.AsParallel().ToList();
// set the settings for the historical ratings adapter
historicalRatingsAdapter.ClearBeforeFill = true;
// fill the stock ratings info
stockRatingsAdapter.Fill(stockRatingsTable);
// get all symbols in the stock ratings table
var symbolsAmountQuery = from c in stockRatingsTable
select new SymbolMarket { Symbol = c.Symbol, Market = c.Market };
List<SymbolMarket> ratingSymbols = symbolsAmountQuery.AsParallel().ToList();
if (ratingSymbols != null)
{
activeSymbolsCount = ratingSymbols.AsParallel().Count();
}
for (int i = 0; i < activeSymbolsCount; i++)
{
symbol = ratingSymbols.AsParallel().ElementAtOrDefault(i).Symbol;
market = ratingSymbols.AsParallel().ElementAtOrDefault(i).Market;
dailyAmexAdapter.FillBySymbol(dailyAmexTable, symbol);
historicalRatingSymbolAdapter.FillBySymbolMarket(historicalRatingSymbolTable, market, symbol);
if (dailyAmexTable != null)
{
var amexFillQuery = from c in dailyAmexTable
select new StockData { Close = c.Close, Date = c.Date, High = c.High, Low = c.Low, Volume = c.Volume };
amexList = amexFillQuery.AsParallel().ToList();
rowCount = amexList.AsParallel().Count();
}
if (historicalRatingSymbolTable != null)
{
completedRowCount = historicalRatingSymbolTable.AsParallel().Count();
completedDates = historicalRatingSymbolTable.AsParallel().Select(d => d.Date).ToList();
}
currentSymbolAdapter.Fill(currentSymbolTable);
var currentSymbolQuery = from c in currentSymbolTable
where c.Symbol == symbol && c.Market == market
select c;
List<OoplesDataSet.CurrentSymbolsRow> currentSymbolRow = currentSymbolQuery.AsParallel().ToList();
// if the rows don't match up and if no other computer is working on the same symbol
if (rowCount - 30 != completedRowCount && currentSymbolRow.Count == 0)
{
// update the table to let the other computers know that we are working on this symbol
var computerQuery = from c in currentSymbolTable
where c.ComputerName == Environment.MachineName
select c;
List<OoplesDataSet.CurrentSymbolsRow> currentComputerRow = computerQuery.AsParallel().ToList();
if (currentComputerRow.Count > 0)
{
// update
currentComputerRow.AsParallel().ElementAtOrDefault(0).Symbol = symbol;
currentComputerRow.AsParallel().ElementAtOrDefault(0).Market = market;
OoplesDataSet.CurrentSymbolsDataTable tempCurrentTable = new OoplesDataSet.CurrentSymbolsDataTable();
tempCurrentTable = (OoplesDataSet.CurrentSymbolsDataTable)currentSymbolTable.GetChanges();
if (tempCurrentTable != null)
{
currentSymbolAdapter.Adapter.UpdateCommand.UpdatedRowSource = System.Data.UpdateRowSource.None;
currentSymbolAdapter.Update(tempCurrentTable);
tempCurrentTable.AcceptChanges();
tempCurrentTable.Dispose();
Console.WriteLine(Environment.MachineName + " has claimed dominion over " + symbol + " in the " + market + " market!");
}
}
else
{
// insert
currentSymbolAdapter.Insert(symbol, market, Environment.MachineName);
Console.WriteLine(Environment.MachineName + " has claimed dominion over " + symbol + " in the " + market + " market!");
}
Parallel.For(0, rowCount - 30, new ParallelOptions
{
MaxDegreeOfParallelism = Environment.ProcessorCount
}, j =>
{
if (amexList.AsParallel().Count() > 0)
{
date = amexList.AsParallel().ElementAtOrDefault(j).Date;
searchDate = date.Subtract(TimeSpan.FromDays(60));
if (completedDates.Contains(date) == false)
{
var amexQuery = from c in sandpInfo
where c.Date >= searchDate && c.Date <= date
join d in amexList on c.Date equals d.Date
select new StockMarketCompare { stockClose = d.Close, marketClose = c.Close };
var amexStockDataQuery = from c in amexList
where c.Date >= searchDate && c.Date <= date
select new StockData { Close = c.Close, High = c.High, Low = c.Low, Volume = c.Volume, Date = c.Date };
stockCompareData = amexQuery.AsParallel().ToList();
stockData = amexStockDataQuery.AsParallel().ToList();
isGoodToGo = true;
}
else
{
isGoodToGo = false;
}
}
if (completedDates.Contains(date) == false)
{
var sandpDateQuery = from c in sandpInfo
where c.Date >= searchDate && c.Date <= date
select c;
sandpDateInfo = sandpDateQuery.AsParallel().ToList();
symbolClass = new SymbolInfo(symbol, market);
isGoodToGo = true;
}
else
{
isGoodToGo = false;
}
if (isGoodToGo)
{
sendMessage(sandpInfo, date, symbolClass, stockData, stockCompareData);
}
});
}
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.StackTrace);
}
}
你所做的似乎有点矫枉过正,不会让你走那么远。
乍一看,我发现了几行我怀疑是 N+1 综合症。
大量使用 AsParallel 来创建列表甚至计数也不会带来任何好处。
然而,最让我担心的是,您说的是 4 个 table,但我数为 13 个适配器?
很明显你在做客户端的所有工作。
不要盲目地用完整的 table 内容填充数据集,然后过滤结果:使用您需要的数据设计一个查询,而不省略 WHERE 子句。现在,您所有的 [24 台计算机] 都在处理同样的问题。
正如上面评论中提到的,您会惊讶于使用 T-SQL 可以做多少事情。
在服务器端预处理该数据;合并、加入和过滤,并可能将结果聚合到第 5 个(临时)table.
让您的其他计算机在考虑分区的情况下查询这些结果,以便它们都承担大约 1/24 的部分工作。
那不是使用 AsParallel,而是 Parallel。效率更高,麻烦更少,更清晰
底线:重新设计:-)
我有一个数据库,其中包含 4 个表,其中包含数百万行。我的程序 运行 在几台计算机上计算数据,然后将其返回到数据库。我的程序设计的一个巨大瓶颈是每次计算都要下载数据,然后对其进行计算,然后将结果保存到数据库中。当我在本地网络上获得数据时,它以惊人的速度执行,所以我意识到从远程服务器下载数据的资源是问题所在。
我可以通过哪些方法在我的代码 运行 之前或之后从远程数据库保存数据,从而使我的程序更有效率。这些计算只完成一次,不再需要,我有 24 台计算机 运行 运行同一个程序。
static void Main(string[] args)
{
try
{
List<StockData> stockData = new List<StockData>();
List<StockMarketCompare> stockCompareData = new List<StockMarketCompare>();
List<StockData> sandpInfo = new List<StockData>();
List<StockData> sandpDateInfo = new List<StockData>();
List<StockData> globalList = new List<StockData>();
List<StockData> amexList = new List<StockData>();
List<StockData> nasdaqList = new List<StockData>();
List<StockData> nyseList = new List<StockData>();
List<DateTime> completedDates = new List<DateTime>();
SymbolInfo symbolClass = new SymbolInfo();
bool isGoodToGo = false;
string symbol, market;
int activeSymbolsCount = 0;
int rowCount = 0, completedRowCount = 0;
DateTime date = new DateTime();
DateTime searchDate = new DateTime();
// get the data here
using (StockRatingsTableAdapter stockRatingsAdapter = new StockRatingsTableAdapter())
using (OoplesDataSet.StockRatingsDataTable stockRatingsTable = new OoplesDataSet.StockRatingsDataTable())
using (SymbolsTableAdapter symbolAdapter = new SymbolsTableAdapter())
using (OoplesDataSet.SymbolsDataTable symbolTable = new OoplesDataSet.SymbolsDataTable())
using (DailyAmexDataTableAdapter dailyAmexAdapter = new DailyAmexDataTableAdapter())
using (OoplesDataSet.DailyAmexDataDataTable dailyAmexTable = new OoplesDataSet.DailyAmexDataDataTable())
using (OoplesDataSet.OldStockRatingsDataTable historicalRatingsTable = new OoplesDataSet.OldStockRatingsDataTable())
using (OldStockRatingsTableAdapter historicalRatingsAdapter = new OldStockRatingsTableAdapter())
using (OoplesDataSet.OldStockRatingsDataTable historicalRatingSymbolTable = new OoplesDataSet.OldStockRatingsDataTable())
using (OldStockRatingsTableAdapter historicalRatingSymbolAdapter = new OldStockRatingsTableAdapter())
using (OoplesDataSet.DailyGlobalDataDataTable sandp500Table = new OoplesDataSet.DailyGlobalDataDataTable())
using (OoplesDataSet.CurrentSymbolsDataTable currentSymbolTable = new OoplesDataSet.CurrentSymbolsDataTable())
using (CurrentSymbolsTableAdapter currentSymbolAdapter = new CurrentSymbolsTableAdapter())
{
// fill the s&p500 info first
dailyGlobalAdapter.ClearBeforeFill = true;
dailyGlobalAdapter.FillBySymbol(sandp500Table, Calculations.sp500);
var sandpQuery = from c in sandp500Table
select new StockData { Close = c.Close, Date = c.Date, High = c.High, Low = c.Low, Volume = c.Volume };
sandpInfo = sandpQuery.AsParallel().ToList();
// set the settings for the historical ratings adapter
historicalRatingsAdapter.ClearBeforeFill = true;
// fill the stock ratings info
stockRatingsAdapter.Fill(stockRatingsTable);
// get all symbols in the stock ratings table
var symbolsAmountQuery = from c in stockRatingsTable
select new SymbolMarket { Symbol = c.Symbol, Market = c.Market };
List<SymbolMarket> ratingSymbols = symbolsAmountQuery.AsParallel().ToList();
if (ratingSymbols != null)
{
activeSymbolsCount = ratingSymbols.AsParallel().Count();
}
for (int i = 0; i < activeSymbolsCount; i++)
{
symbol = ratingSymbols.AsParallel().ElementAtOrDefault(i).Symbol;
market = ratingSymbols.AsParallel().ElementAtOrDefault(i).Market;
dailyAmexAdapter.FillBySymbol(dailyAmexTable, symbol);
historicalRatingSymbolAdapter.FillBySymbolMarket(historicalRatingSymbolTable, market, symbol);
if (dailyAmexTable != null)
{
var amexFillQuery = from c in dailyAmexTable
select new StockData { Close = c.Close, Date = c.Date, High = c.High, Low = c.Low, Volume = c.Volume };
amexList = amexFillQuery.AsParallel().ToList();
rowCount = amexList.AsParallel().Count();
}
if (historicalRatingSymbolTable != null)
{
completedRowCount = historicalRatingSymbolTable.AsParallel().Count();
completedDates = historicalRatingSymbolTable.AsParallel().Select(d => d.Date).ToList();
}
currentSymbolAdapter.Fill(currentSymbolTable);
var currentSymbolQuery = from c in currentSymbolTable
where c.Symbol == symbol && c.Market == market
select c;
List<OoplesDataSet.CurrentSymbolsRow> currentSymbolRow = currentSymbolQuery.AsParallel().ToList();
// if the rows don't match up and if no other computer is working on the same symbol
if (rowCount - 30 != completedRowCount && currentSymbolRow.Count == 0)
{
// update the table to let the other computers know that we are working on this symbol
var computerQuery = from c in currentSymbolTable
where c.ComputerName == Environment.MachineName
select c;
List<OoplesDataSet.CurrentSymbolsRow> currentComputerRow = computerQuery.AsParallel().ToList();
if (currentComputerRow.Count > 0)
{
// update
currentComputerRow.AsParallel().ElementAtOrDefault(0).Symbol = symbol;
currentComputerRow.AsParallel().ElementAtOrDefault(0).Market = market;
OoplesDataSet.CurrentSymbolsDataTable tempCurrentTable = new OoplesDataSet.CurrentSymbolsDataTable();
tempCurrentTable = (OoplesDataSet.CurrentSymbolsDataTable)currentSymbolTable.GetChanges();
if (tempCurrentTable != null)
{
currentSymbolAdapter.Adapter.UpdateCommand.UpdatedRowSource = System.Data.UpdateRowSource.None;
currentSymbolAdapter.Update(tempCurrentTable);
tempCurrentTable.AcceptChanges();
tempCurrentTable.Dispose();
Console.WriteLine(Environment.MachineName + " has claimed dominion over " + symbol + " in the " + market + " market!");
}
}
else
{
// insert
currentSymbolAdapter.Insert(symbol, market, Environment.MachineName);
Console.WriteLine(Environment.MachineName + " has claimed dominion over " + symbol + " in the " + market + " market!");
}
Parallel.For(0, rowCount - 30, new ParallelOptions
{
MaxDegreeOfParallelism = Environment.ProcessorCount
}, j =>
{
if (amexList.AsParallel().Count() > 0)
{
date = amexList.AsParallel().ElementAtOrDefault(j).Date;
searchDate = date.Subtract(TimeSpan.FromDays(60));
if (completedDates.Contains(date) == false)
{
var amexQuery = from c in sandpInfo
where c.Date >= searchDate && c.Date <= date
join d in amexList on c.Date equals d.Date
select new StockMarketCompare { stockClose = d.Close, marketClose = c.Close };
var amexStockDataQuery = from c in amexList
where c.Date >= searchDate && c.Date <= date
select new StockData { Close = c.Close, High = c.High, Low = c.Low, Volume = c.Volume, Date = c.Date };
stockCompareData = amexQuery.AsParallel().ToList();
stockData = amexStockDataQuery.AsParallel().ToList();
isGoodToGo = true;
}
else
{
isGoodToGo = false;
}
}
if (completedDates.Contains(date) == false)
{
var sandpDateQuery = from c in sandpInfo
where c.Date >= searchDate && c.Date <= date
select c;
sandpDateInfo = sandpDateQuery.AsParallel().ToList();
symbolClass = new SymbolInfo(symbol, market);
isGoodToGo = true;
}
else
{
isGoodToGo = false;
}
if (isGoodToGo)
{
sendMessage(sandpInfo, date, symbolClass, stockData, stockCompareData);
}
});
}
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.StackTrace);
}
}
你所做的似乎有点矫枉过正,不会让你走那么远。
乍一看,我发现了几行我怀疑是 N+1 综合症。
大量使用 AsParallel 来创建列表甚至计数也不会带来任何好处。
然而,最让我担心的是,您说的是 4 个 table,但我数为 13 个适配器?
很明显你在做客户端的所有工作。
不要盲目地用完整的 table 内容填充数据集,然后过滤结果:使用您需要的数据设计一个查询,而不省略 WHERE 子句。现在,您所有的 [24 台计算机] 都在处理同样的问题。
正如上面评论中提到的,您会惊讶于使用 T-SQL 可以做多少事情。
在服务器端预处理该数据;合并、加入和过滤,并可能将结果聚合到第 5 个(临时)table.
让您的其他计算机在考虑分区的情况下查询这些结果,以便它们都承担大约 1/24 的部分工作。
那不是使用 AsParallel,而是 Parallel。效率更高,麻烦更少,更清晰
底线:重新设计:-)