如何使用 NPOI 获得非参差不齐的右列

How to get non ragged right column with NPOI

我有一个 excel (.xls) 文件需要解析为 .csv 文件。我正在为 c# 使用最新稳定版本的 NPOI 库。问题是我变得衣衫褴褛 CSV 而不是获得规范化的行大小。

What the data in excel file looks like

输出csv文件是:

"FirstName","MiddleName","LastName","PhNum"

"John","L","Doe","555-555-5555"

"Little","Ding","Bat"

"Roger","D","Rabbit","123-456-7890"

我希望它发生的是在第二个数据行的末尾添加一个额外的分隔符(在 "Bat" 之后),如下所示:

"FirstName","MiddleName","LastName","PhNum"

"John","L","Doe","555-555-5555"

"Little","Ding","Bat",

"Roger","D","Rabbit","123-456-7890"

这是我的代码:

    public override bool ParseFile()
    {
        FileStream iFile = new FileStream(InputFileName, FileMode.Open);
        HSSFWorkbook wb = new HSSFWorkbook(iFile);
        ExcelExtractor extractor = new ExcelExtractor(wb);
        extractor.IncludeBlankCells = true;
        bool result = true;

        if (AllWorksheets)
        {
            for (int i = 0; i < wb.NumberOfSheets; i++)
                result = result && ParseWorksheet(wb, i);
        }
        else
        {
            result = ParseWorksheet(wb, 0);
        }
        return result;
    }
    protected char c = '"';
    public static string FormatValue(string s, bool AddQuotes, char quoteChar)
    {
        if (AddQuotes)
        {
            return quoteChar + s + quoteChar;
        }
        return s;
    }
    private bool ParseWorksheet(HSSFWorkbook wb, int SheetIndex)
    {

        bool result = true;
        HSSFSheet sheet = (HSSFSheet)wb.GetSheetAt(SheetIndex);


        if (sheet.FirstRowNum == sheet.LastRowNum && sheet.LastRowNum == 0) return result;

        System.IO.StreamWriter sw = new StreamWriter(OutputFileName, true);

        for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
        {
            string OutputRow = String.Empty;
            HSSFRow row = (HSSFRow)sheet.GetRow(i);
            int Column = 0;
            int MaxCol = 0;
            int temp = 0;

            for (int j = 0; j < row.LastCellNum; j++)
            {
                temp = row.LastCellNum;
                if (temp > MaxCol)
                {
                    MaxCol = temp;
                }
            }
            for (int j = 0; j < MaxCol; j++)
            {
                if (j == row.Cells[Column].ColumnIndex)
                {
                    switch (row.Cells[Column].CellType)
                    {
                        case NPOI.SS.UserModel.CellType.Boolean:
                            OutputRow += FormatValue(row.Cells[Column].BooleanCellValue.ToString(), AddQuotes, c) + Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.Formula:
                            OutputRow += FormatValue(row.Cells[Column].CachedFormulaResultType.ToString(), AddQuotes, c) + Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.Numeric:
                            OutputRow += FormatValue((NPOI.SS.UserModel.DateUtil.IsCellDateFormatted(row.Cells[Column]) ? row.Cells[Column].DateCellValue.ToShortDateString() : row.Cells[Column].NumericCellValue.ToString()), AddQuotes, c) + Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.Blank:
                            OutputRow += Delimiter.ToString();
                            break;
                        case NPOI.SS.UserModel.CellType.String:
                            OutputRow += FormatValue(row.Cells[Column].StringCellValue.ToString().Replace('\n', ' ').TrimEnd(), AddQuotes, c) + Delimiter.ToString();//replace the new line character to space due to formatting issue.
                            break;
                        default:
                            result = false;
                            break;
                    }
                    Column++;
                }
                else
                {
                    OutputRow += Delimiter.ToString();
                }

            }

            OutputRow = OutputRow.Remove(OutputRow.Length - 1);
            sw.WriteLine(OutputRow);
        }


        sw.Flush();
        sw.Close();

        return result;
    }`

如有任何建议,我们将不胜感激。

这里有几个问题导致了这个问题。

首先,您要为每一行重新计算 MaxCol。如果你想要一个非参差不齐的右边缘,那么你需要先在 all 行上找到 MaxCol,然后 然后 生成输出。

其次,您正在使用 row.Cells[] 尝试获取该行的特定单元格。 Cells[] 忽略空值。所以如果你碰巧在行的某处有一个空白单元格,那么所有剩余的值将向左移动,并且数组的长度将小于MaxCol。如果您尝试访问至少有一个空白值的行 row.Cells[MaxCol - 1],这将导致异常。
这个问题的解决方法是改用row.GetCell(index)方法。此方法 returns 列 index 的单元格(基于 0),或者 null 如果该单元格为空。它使用起来更直接,并且允许您消除代码中的特殊逻辑,该逻辑检查当前单元格的 ColumnIndex 与循环索引 j 以确保您得到的单元格是确实在您期望的专栏中。

作为附加建议,我建议只在内部循环中检索一次当前单元格并将其分配给一个变量,而不是多次重新检索它。这将使您的代码更高效、更易于阅读。

这里是 ParseWorksheet 方法的修订代码,包含上述所有更改:

private bool ParseWorksheet(HSSFWorkbook wb, int SheetIndex)
{
    bool result = true;
    HSSFSheet sheet = (HSSFSheet)wb.GetSheetAt(SheetIndex);

    if (sheet.FirstRowNum == sheet.LastRowNum && sheet.LastRowNum == 0) return result;

    StreamWriter sw = new StreamWriter(OutputFileName, true);

    int MaxCol = 0;
    for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
    {
        HSSFRow row = (HSSFRow)sheet.GetRow(i);
        MaxCol = Math.Max(MaxCol, row.LastCellNum);
    }

    for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
    {
        string OutputRow = String.Empty;
        HSSFRow row = (HSSFRow)sheet.GetRow(i);

        for (int j = 0; j < MaxCol; j++)
        {
            HSSFCell cell = (HSSFCell)row.GetCell(j);
            if (cell != null)
            {
                switch (cell.CellType)
                {
                    case NPOI.SS.UserModel.CellType.Boolean:
                        OutputRow += FormatValue(cell.BooleanCellValue.ToString(), AddQuotes, c) + Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.Formula:
                        OutputRow += FormatValue(cell.CachedFormulaResultType.ToString(), AddQuotes, c) + Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.Numeric:
                        OutputRow += FormatValue((NPOI.SS.UserModel.DateUtil.IsCellDateFormatted(cell) ? cell.DateCellValue.ToShortDateString() : cell.NumericCellValue.ToString()), AddQuotes, c) + Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.Blank:
                        OutputRow += Delimiter;
                        break;
                    case NPOI.SS.UserModel.CellType.String:
                        OutputRow += FormatValue(cell.StringCellValue.ToString().Replace('\n', ' ').TrimEnd(), AddQuotes, c) + Delimiter; //replace the new line character to space due to formatting issue.
                        break;
                    default:
                        result = false;
                        break;
                }
            }
            else
            {
                OutputRow += Delimiter;
            }
        }

        OutputRow = OutputRow.Remove(OutputRow.Length - 1);
        sw.WriteLine(OutputRow);
    }

    sw.Flush();
    sw.Close();

    return result;
}