JSON.net 直接从 oledbconnection 序列化

JSON.net serialize directly from oledbconnection

我目前有一个处理程序,它获取 excel 文件的文件路径和选项卡名称,将文件处理为数据 table,然后将 table 序列化为 json 字符串到 return。 这一直有效,直到我尝试处理大文件,然后出现内存不足异常。

我在想,如果我不先将所有内容加载到数据table 中,而是直接加载到json 字符串中,这样会减少内存使用量。但是,我一直无法找到有关如何执行此操作的任何示例。

我可以直接从 OleDbConnection 序列化为字符串吗?怎么样?

    public void ProcessRequest(HttpContext context)
    {
        string path = context.Request["path"];
        string tableNames = context.Request["tableNames"];

        string connectionString = string.Empty;
        if (path.EndsWith(".xls"))
        {
            connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
                Data Source={0};
                Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path);
        }
        else if (path.EndsWith(".xlsx"))
        {
            connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
                Data Source={0};
                Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path);
        }
        DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");

        DbDataAdapter adapter = factory.CreateDataAdapter();
        OleDbConnection conn = new OleDbConnection(connectionString);
        conn.Open();

        DataTable tmp = new DataTable();

        DbCommand selectCommand = factory.CreateCommand();

        selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", tableNames);
        selectCommand.Connection = conn;
        adapter.SelectCommand = selectCommand;


        adapter.Fill(tmp);
        string tabdata = JsonConvert.SerializeObject(tmp);
        context.Response.Write(tabdata);
    }

首先,您应该停止序列化为中间 string,而是使用以下简单方法直接序列化为 HttpResponse.OutputStream

public static class JsonExtensions
{
    public static void SerializeToStream(object value, System.Web.HttpResponse response, JsonSerializerSettings settings = null)
    {
        if (response == null)
            throw new ArgumentNullException("response");
        SerializeToStream(value, response.OutputStream, settings);
    }

    public static void SerializeToStream(object value, TextWriter writer, JsonSerializerSettings settings = null)
    {
        if (writer == null)
            throw new ArgumentNullException("writer");
        var serializer = JsonSerializer.CreateDefault(settings);
        serializer.Serialize(writer, value);
    }

    public static void SerializeToStream(object value, Stream stream, JsonSerializerSettings settings = null)
    {
        if (stream == null)
            throw new ArgumentNullException("stream");
        using (var writer = new StreamWriter(stream))
        {
            SerializeToStream(value, writer, settings);
        }
    }
}

由于一个大字符串需要一个大的连续内存块用于底层char数组,这就是你要运行内存不足的地方第一的。另见 Json.NET 的 Performance Tips

To minimize memory usage and the number of objects allocated, Json.NET supports serializing and deserializing directly to a stream. Reading or writing JSON a piece at a time, instead of having the entire JSON string loaded into memory, is especially important when working with JSON documents greater than 85kb in size to avoid the JSON string ending up in the large object heap.

接下来,请务必将所有一次性物品包装在 using 语句中,如下所示。

这可能会解决您的问题,但如果不能,您可以序列化一个 IDataReader to JSON using the following JsonConverter:

public class DataReaderConverter : JsonConverter
{
    public override bool CanConvert(Type objectType)
    {
        return typeof(IDataReader).IsAssignableFrom(objectType);
    }

    public override bool CanRead { get { return false; } }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        throw new NotImplementedException();
    }

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        var reader = (IDataReader)value;
        writer.WriteStartArray();
        while (reader.Read())
        {
            writer.WriteStartObject();
            for (int i = 0; i < reader.FieldCount; i++)
            {
                writer.WritePropertyName(reader.GetName(i));
                if (reader.IsDBNull(i))
                    writer.WriteNull();
                else
                    serializer.Serialize(writer, reader[i]);
            }
            writer.WriteEndObject();
        }
        writer.WriteEndArray();
    }
}

然后序列化为stream如下:

public static class ExcelExtensions
{
    private static string GetExcelConnectionString(string path)
    {
        string connectionString = string.Empty;
        if (path.EndsWith(".xls"))
        {
            connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
            Data Source={0};
            Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path);
        }
        else if (path.EndsWith(".xlsx"))
        {
            connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
            Data Source={0};
            Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path);
        }
        return connectionString;
    }

    public static string SerializeJsonToString(string path, string workSheetName, JsonSerializerSettings settings = null)
    {
        using (var writer = new StringWriter())
        {
            SerializeJsonToStream(path, workSheetName, writer, settings);
            return writer.ToString();
        }
    }

    public static void SerializeJsonToStream(string path, string workSheetName, Stream stream, JsonSerializerSettings settings = null)
    {
        using (var writer = new StreamWriter(stream))
            SerializeJsonToStream(path, workSheetName, writer, settings);
    }

    public static void SerializeJsonToStream(string path, string workSheetName, TextWriter writer, JsonSerializerSettings settings = null)
    {
        settings = settings ?? new JsonSerializerSettings();
        var converter = new DataReaderConverter();
        settings.Converters.Add(converter);
        try
        {
            string connectionString = GetExcelConnectionString(path);
            DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");

            using (OleDbConnection conn = new OleDbConnection(connectionString))
            {
                conn.Open();
                using (DbCommand selectCommand = factory.CreateCommand())
                {
                    selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", workSheetName);
                    selectCommand.Connection = conn;

                    using (var reader = selectCommand.ExecuteReader())
                    {
                        JsonExtensions.SerializeToStream(reader, writer, settings);
                    }
                }
            }
        }
        finally
        {
            settings.Converters.Remove(converter);
        }
    }
}

注意 - 轻微测试。在将其投入生产之前,请务必针对您现有的方法对其进行单元测试!对于转换器代码,我使用 JSON Serialization of a DataReader 作为灵感。

更新

我的转换器发出 JSON 的结构与 Json.NET 的 DataTableConverter 相同。因此,您将能够使用 Json.NET 自动反序列化为 DataTable。如果您喜欢更紧凑的格式,您可以定义自己的格式,例如:

{
  "columns": [
    "Name 1",
    "Name 2"
  ],
  "rows": [
    [
      "value 11",
      "value 12"
    ],
    [
      "value 21",
      "value 22"
    ]
  ]
}

然后他们创建了以下转换器:

public class DataReaderArrayConverter : JsonConverter
{
    public override bool CanConvert(Type objectType)
    {
        return typeof(IDataReader).IsAssignableFrom(objectType);
    }

    public override bool CanRead { get { return false; } }

    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        throw new NotImplementedException();
    }

    static string[] GetFieldNames(IDataReader reader)
    {
        var fieldNames = new string[reader.FieldCount];
        for (int i = 0; i < reader.FieldCount; i++)
            fieldNames[i] = reader.GetName(i);
        return fieldNames;
    }

    static void ValidateFieldNames(IDataReader reader, string[] fieldNames)
    {
        if (reader.FieldCount != fieldNames.Length)
            throw new InvalidOperationException("Unequal record lengths");
        for (int i = 0; i < reader.FieldCount; i++)
            if (fieldNames[i] != reader.GetName(i))
                throw new InvalidOperationException(string.Format("Field names at index {0} differ: \"{1}\" vs \"{2}\"", i, fieldNames[i], reader.GetName(i)));
    }

    const string columnsName = "columns";
    const string rowsName = "rows";

    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        var reader = (IDataReader)value;
        writer.WriteStartObject();
        string[] fieldNames = null;
        while (reader.Read())
        {
            if (fieldNames == null)
            {
                writer.WritePropertyName(columnsName);
                fieldNames = GetFieldNames(reader);
                serializer.Serialize(writer, fieldNames);
                writer.WritePropertyName(rowsName);
                writer.WriteStartArray();
            }
            else
            {
                ValidateFieldNames(reader, fieldNames);
            }

            writer.WriteStartArray();
            for (int i = 0; i < reader.FieldCount; i++)
            {
                if (reader.IsDBNull(i))
                    writer.WriteNull();
                else
                    serializer.Serialize(writer, reader[i]);
            }
            writer.WriteEndArray();
        }
        if (fieldNames != null)
        {
            writer.WriteEndArray();
        }
        writer.WriteEndObject();
    }
}

当然,您需要在客户端创建自己的反序列化转换器。

或者,您可以考虑压缩您的回复。我从未尝试过,但看到 HttpWebRequest and GZip Http Responses and ASP.NET GZip Encoding Caveats.