C#提高稀疏矩阵行列合计计算的并行性能
C# Improve parallel performance of Sparse Matrix Row and Column Total Calculations
我有一个包含大约 1 亿个非零元素的稀疏矩阵:
// [Row][Column][Element]
public IDictionary<int, IDictionary<int, decimal>> MyMatrix { get; private set; }
获取每一行的总和非常快:
private void RowSum()
{
var rowTotals = new ConcurrentDictionary<int, decimal>();
Parallel.ForEach(MyMatrix, (row) =>
{
rowTotals.TryAdd(row.Key, row.Value.Sum(x => x.Value));
});
}
获取每列的总和要慢得多:
private void ColumnSum()
{
var columnTotals = new ConcurrentDictionary<int, decimal>();
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
columnTotals.AddOrUpdate(column.Key, column.Value,
(key, old) => old + column.Value);
}
});
}
为了加快列计算速度,我可以创建一个 [Column][Row][Element] 矩阵,但这会使 RAM 要求加倍。是否有任何方法或数据结构可以让列计算与行计算一样快,而无需加倍内存?
可能发生的情况是存在对中心化 ConcurrentDictionary
的争夺。如果是这种情况,您可以尝试 Parallel.ForEach
的 localInit
重载,为每个任务批处理提供自己的本地(和无竞争)Dictionary
,然后将其聚合到中央字典中结束:
var columnTotals = new ConcurrentDictionary<int, decimal>();
Parallel.ForEach(MyMatrix,
// Each Task gets own dictionary
() => new Dictionary<int, decimal>(),
(row, state, colTots) =>
{
foreach (var column in row.Value)
{
if (!colTots.ContainsKey(column.Key))
{
colTots[column.Key] = column.Value;
}
else
{
colTots[column.Key] += column.Value;
}
}
return colTots;
},
colTots =>
{
// Aggregate the dictionaries
foreach (var column in colTots)
{
columnTotals.AddOrUpdate(column.Key, column.Value,
(key, old) => old + column.Value);
}
});
编辑
一些计时(100000 x 100000 space 中填充了 1000 万个元素)
- 你的 RowSum 425ms
- 你的 ColumnSum 7774ms
- localInit ColumnSum 3324ms
所以仍然比行总和慢一个数量级,但看起来是一个合理的改进。
(在我的词典使用中也是错误)
我认为
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
columnTotals.AddOrUpdate(column.Key, 0, (key, old) => old + column.Value);
}
});
应该是
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
columnTotals.AddOrUpdate(column.Key, column.value, (key, old) => old + column.Value);
}
});
我认为您可以通过从 public IDictionary<Tuple<int, int>, decimal> MyMatrix { get; private set; }
开始使性能更加对称(但不会更快)
如果最高列值和最低列值之间的差异小到足以创建一个简单的 int 数组,您可以按以下步骤进行:
确定最高值和最低值以进一步创建一个对应数组,将每个列值关联到用于对值求和的 2 个数组中的索引,即存储列值的数组,并并行存储列总计。
您的代码将如下所示:
private void ColumnSum()
{
int highestKeyValue=int.MinValue;
int lowestKeyValue =int.MaxValue;
Parallel.ForEach(MyMatrix, (row) =>
{ // identify highest and lowest column value
foreach (var column in row.Value)
{
lowest =Math.Min(lowestKeyValue ,column.Key) ;
highest=Math.Max(highestKeyValue,column.Key) ;
}
// Create correspondence array
int[] corrrespondence=new int[highestKeyValue-lowestKeyValue];
for (int i=0;i<highest-lowest;i++) corrrespondence[i]=-1 ;
Parallel.ForEach(MyMatrix, (row) =>
{ // tag the key values found in matrix
foreach (var column in row.Value) corrrespondence[column.Key-lowest]=0 ;
}
int columnsCount=0 ;
// compute the indexes to result arrays
for (int i=0;i<highest-lowest;i++)
if (corrrespondence[i]>=0) corrrespondence[i]=columnsCount++ ;
// allocate and initialize results array
int[] columnValues=new int[columnsCount]() ;
int[] columnTotals=new int[columnsCount]() ;
for (int i=0;i<columnsCount;i++) columnTotals[i]=0 ;
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
int j=correspondence[column.Key-lowest] ;
// a lock on results[j] is required there to avoid concurrent update of results[j]
columnValues[j]=column.Key ;
columnTotals[j]+=column.Value ;
}
}
}
我有一个包含大约 1 亿个非零元素的稀疏矩阵:
// [Row][Column][Element]
public IDictionary<int, IDictionary<int, decimal>> MyMatrix { get; private set; }
获取每一行的总和非常快:
private void RowSum()
{
var rowTotals = new ConcurrentDictionary<int, decimal>();
Parallel.ForEach(MyMatrix, (row) =>
{
rowTotals.TryAdd(row.Key, row.Value.Sum(x => x.Value));
});
}
获取每列的总和要慢得多:
private void ColumnSum()
{
var columnTotals = new ConcurrentDictionary<int, decimal>();
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
columnTotals.AddOrUpdate(column.Key, column.Value,
(key, old) => old + column.Value);
}
});
}
为了加快列计算速度,我可以创建一个 [Column][Row][Element] 矩阵,但这会使 RAM 要求加倍。是否有任何方法或数据结构可以让列计算与行计算一样快,而无需加倍内存?
可能发生的情况是存在对中心化 ConcurrentDictionary
的争夺。如果是这种情况,您可以尝试 Parallel.ForEach
的 localInit
重载,为每个任务批处理提供自己的本地(和无竞争)Dictionary
,然后将其聚合到中央字典中结束:
var columnTotals = new ConcurrentDictionary<int, decimal>();
Parallel.ForEach(MyMatrix,
// Each Task gets own dictionary
() => new Dictionary<int, decimal>(),
(row, state, colTots) =>
{
foreach (var column in row.Value)
{
if (!colTots.ContainsKey(column.Key))
{
colTots[column.Key] = column.Value;
}
else
{
colTots[column.Key] += column.Value;
}
}
return colTots;
},
colTots =>
{
// Aggregate the dictionaries
foreach (var column in colTots)
{
columnTotals.AddOrUpdate(column.Key, column.Value,
(key, old) => old + column.Value);
}
});
编辑
一些计时(100000 x 100000 space 中填充了 1000 万个元素)
- 你的 RowSum 425ms
- 你的 ColumnSum 7774ms
- localInit ColumnSum 3324ms
所以仍然比行总和慢一个数量级,但看起来是一个合理的改进。
(在我的词典使用中也是错误)
我认为
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
columnTotals.AddOrUpdate(column.Key, 0, (key, old) => old + column.Value);
}
});
应该是
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
columnTotals.AddOrUpdate(column.Key, column.value, (key, old) => old + column.Value);
}
});
我认为您可以通过从 public IDictionary<Tuple<int, int>, decimal> MyMatrix { get; private set; }
如果最高列值和最低列值之间的差异小到足以创建一个简单的 int 数组,您可以按以下步骤进行:
确定最高值和最低值以进一步创建一个对应数组,将每个列值关联到用于对值求和的 2 个数组中的索引,即存储列值的数组,并并行存储列总计。
您的代码将如下所示:
private void ColumnSum()
{
int highestKeyValue=int.MinValue;
int lowestKeyValue =int.MaxValue;
Parallel.ForEach(MyMatrix, (row) =>
{ // identify highest and lowest column value
foreach (var column in row.Value)
{
lowest =Math.Min(lowestKeyValue ,column.Key) ;
highest=Math.Max(highestKeyValue,column.Key) ;
}
// Create correspondence array
int[] corrrespondence=new int[highestKeyValue-lowestKeyValue];
for (int i=0;i<highest-lowest;i++) corrrespondence[i]=-1 ;
Parallel.ForEach(MyMatrix, (row) =>
{ // tag the key values found in matrix
foreach (var column in row.Value) corrrespondence[column.Key-lowest]=0 ;
}
int columnsCount=0 ;
// compute the indexes to result arrays
for (int i=0;i<highest-lowest;i++)
if (corrrespondence[i]>=0) corrrespondence[i]=columnsCount++ ;
// allocate and initialize results array
int[] columnValues=new int[columnsCount]() ;
int[] columnTotals=new int[columnsCount]() ;
for (int i=0;i<columnsCount;i++) columnTotals[i]=0 ;
Parallel.ForEach(MyMatrix, (row) =>
{
foreach (var column in row.Value)
{
int j=correspondence[column.Key-lowest] ;
// a lock on results[j] is required there to avoid concurrent update of results[j]
columnValues[j]=column.Key ;
columnTotals[j]+=column.Value ;
}
}
}