在 C# 中读取 mzXML(质谱)文件
Reading mzXML (mass spectrometry) files in C#
我正在尝试创建一个小型应用程序来从 mzXML 文件中读取质谱数据,并为潜在的出版物绘制一个看起来不错的图,但是我不知道如何从中读取实际数据XML 文件。
据我所知,我需要使用 Base64 对其进行解码,使用 bzip 进行解压缩,将网络字节顺序转换为客户端字节顺序,最后生成某种双精度数组。我只是对编程的这方面了解不够,无法将其他在线代码片段翻译成 C#。
这是我在网上找到的一些代码,但我无法翻译它。
#!/usr/bin/perl
use strict;
use warnings;
use Compress::Zlib qw(uncompress);
use MIME::Base64;
# Data is Base64 encoded and compressed with zlib
my $data = 'ENCODED DATA STRING';#keep encoded data here or read from file
# Decode and uncompress the data
my $base64decoded = uncompress( decode_base64($data) );
# Data is in 64-bit floats in network order
# Unpack as a 64-bit network order quad int
my @hostOrder = unpack("Q>*", $base64decoded );
#flag zero for m/z and 1 for intensity
my $flag=0;
foreach my $i (@hostOrder)
{
# Pack into a native quad then unpack into the correct 64-bit float
my $val = ( unpack("d", pack("Q", $i ) ) );
if ($flag==0)
{
$val=sprintf("%.12g" , $val);
print "$val ";
$flag=1;
next;
}
elsif($flag==1)
{
if ($val==0) { $val="0.0"; }
else { $val=sprintf("%.12g" , $val); }
print "$val\n";
$flag=0;
next;
}
}
Example of the data which I can't read:
https://pastebin.com/1k51rNZc
我尝试使用我认为等效的 C# 代码来复制代码,但我不得不承认我不知道我在这方面做了什么。
谢谢。
解决方案如下(不漂亮)
void Decompress()
{
var bytes = System.Convert.FromBase64String(DataString);
var tmp_size = bytes.Length/4;
int idx = 0;
List<float> mz_list = new List<float>();
List<float> intensity_list = new List<float>();
string Out = "I";
foreach (object tmp in StructConverter.Unpack(tmp_size, "", bytes))
{
var tmp_i = StructConverter.Pack(new object[] { tmp }, false, out Out);
var tmp_f = StructConverter.Unpack("f", tmp_i)[0];
if (idx % 2 == 0) mz_list.Add((float)(tmp_f));
else intensity_list.Add((float)(tmp_f));
idx++;
}
X = mz_list;
Y = intensity_list;
}
pack/unpack 的 C# 端口(不是我的,而是我的科学怪人)
// This is a crude implementation of a format string based struct converter for C#.
// This is probably not the best implementation, the fastest implementation, the most bug-proof implementation, or even the most functional implementation.
// It's provided as-is for free. Enjoy.
public class StructConverter
{
static bool debug = false;
// We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
// This means we can have much cleaner code below.
private static byte[] TypeAgnosticGetBytes(object o)
{
if (o is int) return BitConverter.GetBytes((int)o);
if (o is uint) return BitConverter.GetBytes((uint)o);
if (o is long) return BitConverter.GetBytes((long)o);
if (o is ulong) return BitConverter.GetBytes((ulong)o);
if (o is short) return BitConverter.GetBytes((short)o);
if (o is ushort) return BitConverter.GetBytes((ushort)o);
if (o is byte || o is sbyte) return new byte[] { (byte)o };
throw new ArgumentException("Unsupported object type found");
}
private static string GetFormatSpecifierFor(object o)
{
if (o is int) return "i";
if (o is uint) return "I";
if (o is long) return "q";
if (o is ulong) return "Q";
if (o is short) return "h";
if (o is ushort) return "H";
if (o is byte) return "B";
if (o is sbyte) return "b";
throw new ArgumentException("Unsupported object type found");
}
/// <summary>
/// Convert a byte array into an array of objects based on Python's "struct.unpack" protocol.
/// </summary>
/// <param name="fmt">A "struct.pack"-compatible format string</param>
/// <param name="bytes">An array of bytes to convert to objects</param>
/// <returns>Array of objects.</returns>
/// <remarks>You are responsible for casting the objects in the array back to their proper types.</remarks>
public static object[] Unpack(string fmt, byte[] bytes)
{
if (debug) Debug.WriteLine("Format string is length {0}, {1} bytes provided.", fmt.Length, bytes.Length);
// First we parse the format string to make sure it's proper.
if (fmt.Length < 1) throw new ArgumentException("Format string cannot be empty.");
bool endianFlip = false;
if (fmt.Substring(0, 1) == "<")
{
if (debug) Debug.WriteLine(" Endian marker found: little endian");
// Little endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == false) endianFlip = true;
fmt = fmt.Substring(1);
}
else if (fmt.Substring(0, 1) == ">")
{
if (debug) Debug.WriteLine(" Endian marker found: big endian");
// Big endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == true) endianFlip = true;
fmt = fmt.Substring(1);
}
// Now, we find out how long the byte array needs to be
int totalByteLength = 0;
foreach (char c in fmt.ToCharArray())
{
//Debug.WriteLine(" Format character found: {0}", c);
switch (c)
{
case 'q':
case 'Q':
totalByteLength += 8;
break;
case 'i':
case 'L':
case 'f':
case 'I':
totalByteLength += 4;
break;
case 'h':
case 'H':
totalByteLength += 2;
break;
case 'b':
case 'B':
case 'x':
totalByteLength += 1;
break;
default:
throw new ArgumentException("Invalid character found in format string.");
}
}
if (debug) Debug.WriteLine("Endianness will {0}be flipped.", (object)(endianFlip == true ? "" : "NOT "));
if (debug) Debug.WriteLine("The byte array is expected to be {0} bytes long.", totalByteLength);
// Test the byte array length to see if it contains as many bytes as is needed for the string.
if (bytes.Length != totalByteLength) throw new ArgumentException("The number of bytes provided does not match the total length of the format string.");
// Ok, we can go ahead and start parsing bytes!
int byteArrayPosition = 0;
List<object> outputList = new List<object>();
byte[] buf;
if (debug) Debug.WriteLine("Processing byte array...");
foreach (char c in fmt.ToCharArray())
{
switch (c)
{
case 'q':
outputList.Add((object)(long)BitConverter.ToInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added signed 64-bit integer.");
break;
case 'Q':
outputList.Add((object)(ulong)BitConverter.ToUInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added unsigned 64-bit integer.");
break;
case 'l':
outputList.Add((object)(int)BitConverter.ToInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added signed 32-bit integer.");
break;
case 'L':
outputList.Add((object)(uint)BitConverter.ToUInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsignedsigned 32-bit integer.");
break;
case 'h':
outputList.Add((object)(short)BitConverter.ToInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added signed 16-bit integer.");
break;
case 'H':
outputList.Add((object)(ushort)BitConverter.ToUInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added unsigned 16-bit integer.");
break;
case 'b':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(sbyte)buf[0]);
byteArrayPosition++;
if (debug) Debug.WriteLine(" Added signed byte");
break;
case 'B':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(byte)buf[0]);
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned byte");
break;
case 'f':
outputList.Add((object)(float)BitConverter.ToSingle(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned 32-bit float.");
break;
case 'x':
byteArrayPosition++;
if (debug) Debug.WriteLine(" Ignoring a byte");
break;
default:
throw new ArgumentException("You should not be here.");
}
}
return outputList.ToArray();
}
public static object[] Unpack(int len, string fmt, byte[] bytes)
{
string _fmt = new string('L', len);
_fmt += fmt;
return Unpack(_fmt, bytes);
}
/// <summary>
/// Convert an array of objects to a byte array, along with a string that can be used with Unpack.
/// </summary>
/// <param name="items">An object array of items to convert</param>
/// <param name="LittleEndian">Set to False if you want to use big endian output.</param>
/// <param name="NeededFormatStringToRecover">Variable to place an 'Unpack'-compatible format string into.</param>
/// <returns>A Byte array containing the objects provided in binary format.</returns>
public static byte[] Pack(object[] items, bool LittleEndian, out string NeededFormatStringToRecover)
{
// make a byte list to hold the bytes of output
List<byte> outputBytes = new List<byte>();
// should we be flipping bits for proper endinanness?
bool endianFlip = (LittleEndian != BitConverter.IsLittleEndian);
// start working on the output string
string outString = (LittleEndian == false ? ">" : "<");
// convert each item in the objects to the representative bytes
foreach (object o in items)
{
byte[] theseBytes = TypeAgnosticGetBytes(o);
if (endianFlip == true) theseBytes = (byte[])theseBytes.Reverse().ToArray();
outString += GetFormatSpecifierFor(o);
outputBytes.AddRange(theseBytes);
}
NeededFormatStringToRecover = outString;
return outputBytes.ToArray();
}
public static byte[] Pack(object[] items)
{
string dummy = "";
return Pack(items, true, out dummy);
}
}
看看这些数字是否有意义:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Drawing;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.xml";
static void Main(string[] args)
{
XDocument doc = XDocument.Load(FILENAME);
string peaks = (string)doc.Element("peaks");
byte[] data = Convert.FromBase64String(peaks);
Single[] number = data.Select((x, i) => new { num = x, index = i }).GroupBy(x => x.index / 4).Select(x => BitConverter.ToSingle(x.Select(y => y.num).ToArray(), 0)).ToArray();
PointF[] points = number.Select((x, i) => new { num = x, index = i }).GroupBy(x => x.index / 2).Select(x => new PointF(x.First().num, x.Last().num)).ToArray();
}
}
}
解决方案如下(不漂亮)
void Decompress()
{
var bytes = System.Convert.FromBase64String(DataString);
var tmp_size = bytes.Length/4;
int idx = 0;
List<float> mz_list = new List<float>();
List<float> intensity_list = new List<float>();
string Out = "I";
foreach (object tmp in StructConverter.Unpack(tmp_size, "", bytes))
{
var tmp_i = StructConverter.Pack(new object[] { tmp }, false, out Out);
var tmp_f = StructConverter.Unpack("f", tmp_i)[0];
if (idx % 2 == 0) mz_list.Add((float)(tmp_f));
else intensity_list.Add((float)(tmp_f));
idx++;
}
X = mz_list;
Y = intensity_list;
}
pack/unpack 的 C# 端口(不是我的,而是我的科学怪人)
// This is a crude implementation of a format string based struct converter for C#.
// This is probably not the best implementation, the fastest implementation, the most bug-proof implementation, or even the most functional implementation.
// It's provided as-is for free. Enjoy.
public class StructConverter
{
static bool debug = false;
// We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
// This means we can have much cleaner code below.
private static byte[] TypeAgnosticGetBytes(object o)
{
if (o is int) return BitConverter.GetBytes((int)o);
if (o is uint) return BitConverter.GetBytes((uint)o);
if (o is long) return BitConverter.GetBytes((long)o);
if (o is ulong) return BitConverter.GetBytes((ulong)o);
if (o is short) return BitConverter.GetBytes((short)o);
if (o is ushort) return BitConverter.GetBytes((ushort)o);
if (o is byte || o is sbyte) return new byte[] { (byte)o };
throw new ArgumentException("Unsupported object type found");
}
private static string GetFormatSpecifierFor(object o)
{
if (o is int) return "i";
if (o is uint) return "I";
if (o is long) return "q";
if (o is ulong) return "Q";
if (o is short) return "h";
if (o is ushort) return "H";
if (o is byte) return "B";
if (o is sbyte) return "b";
throw new ArgumentException("Unsupported object type found");
}
/// <summary>
/// Convert a byte array into an array of objects based on Python's "struct.unpack" protocol.
/// </summary>
/// <param name="fmt">A "struct.pack"-compatible format string</param>
/// <param name="bytes">An array of bytes to convert to objects</param>
/// <returns>Array of objects.</returns>
/// <remarks>You are responsible for casting the objects in the array back to their proper types.</remarks>
public static object[] Unpack(string fmt, byte[] bytes)
{
if (debug) Debug.WriteLine("Format string is length {0}, {1} bytes provided.", fmt.Length, bytes.Length);
// First we parse the format string to make sure it's proper.
if (fmt.Length < 1) throw new ArgumentException("Format string cannot be empty.");
bool endianFlip = false;
if (fmt.Substring(0, 1) == "<")
{
if (debug) Debug.WriteLine(" Endian marker found: little endian");
// Little endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == false) endianFlip = true;
fmt = fmt.Substring(1);
}
else if (fmt.Substring(0, 1) == ">")
{
if (debug) Debug.WriteLine(" Endian marker found: big endian");
// Big endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == true) endianFlip = true;
fmt = fmt.Substring(1);
}
// Now, we find out how long the byte array needs to be
int totalByteLength = 0;
foreach (char c in fmt.ToCharArray())
{
//Debug.WriteLine(" Format character found: {0}", c);
switch (c)
{
case 'q':
case 'Q':
totalByteLength += 8;
break;
case 'i':
case 'L':
case 'f':
case 'I':
totalByteLength += 4;
break;
case 'h':
case 'H':
totalByteLength += 2;
break;
case 'b':
case 'B':
case 'x':
totalByteLength += 1;
break;
default:
throw new ArgumentException("Invalid character found in format string.");
}
}
if (debug) Debug.WriteLine("Endianness will {0}be flipped.", (object)(endianFlip == true ? "" : "NOT "));
if (debug) Debug.WriteLine("The byte array is expected to be {0} bytes long.", totalByteLength);
// Test the byte array length to see if it contains as many bytes as is needed for the string.
if (bytes.Length != totalByteLength) throw new ArgumentException("The number of bytes provided does not match the total length of the format string.");
// Ok, we can go ahead and start parsing bytes!
int byteArrayPosition = 0;
List<object> outputList = new List<object>();
byte[] buf;
if (debug) Debug.WriteLine("Processing byte array...");
foreach (char c in fmt.ToCharArray())
{
switch (c)
{
case 'q':
outputList.Add((object)(long)BitConverter.ToInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added signed 64-bit integer.");
break;
case 'Q':
outputList.Add((object)(ulong)BitConverter.ToUInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added unsigned 64-bit integer.");
break;
case 'l':
outputList.Add((object)(int)BitConverter.ToInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added signed 32-bit integer.");
break;
case 'L':
outputList.Add((object)(uint)BitConverter.ToUInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsignedsigned 32-bit integer.");
break;
case 'h':
outputList.Add((object)(short)BitConverter.ToInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added signed 16-bit integer.");
break;
case 'H':
outputList.Add((object)(ushort)BitConverter.ToUInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added unsigned 16-bit integer.");
break;
case 'b':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(sbyte)buf[0]);
byteArrayPosition++;
if (debug) Debug.WriteLine(" Added signed byte");
break;
case 'B':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(byte)buf[0]);
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned byte");
break;
case 'f':
outputList.Add((object)(float)BitConverter.ToSingle(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned 32-bit float.");
break;
case 'x':
byteArrayPosition++;
if (debug) Debug.WriteLine(" Ignoring a byte");
break;
default:
throw new ArgumentException("You should not be here.");
}
}
return outputList.ToArray();
}
public static object[] Unpack(int len, string fmt, byte[] bytes)
{
string _fmt = new string('L', len);
_fmt += fmt;
return Unpack(_fmt, bytes);
}
/// <summary>
/// Convert an array of objects to a byte array, along with a string that can be used with Unpack.
/// </summary>
/// <param name="items">An object array of items to convert</param>
/// <param name="LittleEndian">Set to False if you want to use big endian output.</param>
/// <param name="NeededFormatStringToRecover">Variable to place an 'Unpack'-compatible format string into.</param>
/// <returns>A Byte array containing the objects provided in binary format.</returns>
public static byte[] Pack(object[] items, bool LittleEndian, out string NeededFormatStringToRecover)
{
// make a byte list to hold the bytes of output
List<byte> outputBytes = new List<byte>();
// should we be flipping bits for proper endinanness?
bool endianFlip = (LittleEndian != BitConverter.IsLittleEndian);
// start working on the output string
string outString = (LittleEndian == false ? ">" : "<");
// convert each item in the objects to the representative bytes
foreach (object o in items)
{
byte[] theseBytes = TypeAgnosticGetBytes(o);
if (endianFlip == true) theseBytes = (byte[])theseBytes.Reverse().ToArray();
outString += GetFormatSpecifierFor(o);
outputBytes.AddRange(theseBytes);
}
NeededFormatStringToRecover = outString;
return outputBytes.ToArray();
}
public static byte[] Pack(object[] items)
{
string dummy = "";
return Pack(items, true, out dummy);
}
}
我正在尝试创建一个小型应用程序来从 mzXML 文件中读取质谱数据,并为潜在的出版物绘制一个看起来不错的图,但是我不知道如何从中读取实际数据XML 文件。
据我所知,我需要使用 Base64 对其进行解码,使用 bzip 进行解压缩,将网络字节顺序转换为客户端字节顺序,最后生成某种双精度数组。我只是对编程的这方面了解不够,无法将其他在线代码片段翻译成 C#。
这是我在网上找到的一些代码,但我无法翻译它。
#!/usr/bin/perl
use strict;
use warnings;
use Compress::Zlib qw(uncompress);
use MIME::Base64;
# Data is Base64 encoded and compressed with zlib
my $data = 'ENCODED DATA STRING';#keep encoded data here or read from file
# Decode and uncompress the data
my $base64decoded = uncompress( decode_base64($data) );
# Data is in 64-bit floats in network order
# Unpack as a 64-bit network order quad int
my @hostOrder = unpack("Q>*", $base64decoded );
#flag zero for m/z and 1 for intensity
my $flag=0;
foreach my $i (@hostOrder)
{
# Pack into a native quad then unpack into the correct 64-bit float
my $val = ( unpack("d", pack("Q", $i ) ) );
if ($flag==0)
{
$val=sprintf("%.12g" , $val);
print "$val ";
$flag=1;
next;
}
elsif($flag==1)
{
if ($val==0) { $val="0.0"; }
else { $val=sprintf("%.12g" , $val); }
print "$val\n";
$flag=0;
next;
}
}
Example of the data which I can't read: https://pastebin.com/1k51rNZc
我尝试使用我认为等效的 C# 代码来复制代码,但我不得不承认我不知道我在这方面做了什么。
谢谢。
解决方案如下(不漂亮)
void Decompress()
{
var bytes = System.Convert.FromBase64String(DataString);
var tmp_size = bytes.Length/4;
int idx = 0;
List<float> mz_list = new List<float>();
List<float> intensity_list = new List<float>();
string Out = "I";
foreach (object tmp in StructConverter.Unpack(tmp_size, "", bytes))
{
var tmp_i = StructConverter.Pack(new object[] { tmp }, false, out Out);
var tmp_f = StructConverter.Unpack("f", tmp_i)[0];
if (idx % 2 == 0) mz_list.Add((float)(tmp_f));
else intensity_list.Add((float)(tmp_f));
idx++;
}
X = mz_list;
Y = intensity_list;
}
pack/unpack 的 C# 端口(不是我的,而是我的科学怪人)
// This is a crude implementation of a format string based struct converter for C#.
// This is probably not the best implementation, the fastest implementation, the most bug-proof implementation, or even the most functional implementation.
// It's provided as-is for free. Enjoy.
public class StructConverter
{
static bool debug = false;
// We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
// This means we can have much cleaner code below.
private static byte[] TypeAgnosticGetBytes(object o)
{
if (o is int) return BitConverter.GetBytes((int)o);
if (o is uint) return BitConverter.GetBytes((uint)o);
if (o is long) return BitConverter.GetBytes((long)o);
if (o is ulong) return BitConverter.GetBytes((ulong)o);
if (o is short) return BitConverter.GetBytes((short)o);
if (o is ushort) return BitConverter.GetBytes((ushort)o);
if (o is byte || o is sbyte) return new byte[] { (byte)o };
throw new ArgumentException("Unsupported object type found");
}
private static string GetFormatSpecifierFor(object o)
{
if (o is int) return "i";
if (o is uint) return "I";
if (o is long) return "q";
if (o is ulong) return "Q";
if (o is short) return "h";
if (o is ushort) return "H";
if (o is byte) return "B";
if (o is sbyte) return "b";
throw new ArgumentException("Unsupported object type found");
}
/// <summary>
/// Convert a byte array into an array of objects based on Python's "struct.unpack" protocol.
/// </summary>
/// <param name="fmt">A "struct.pack"-compatible format string</param>
/// <param name="bytes">An array of bytes to convert to objects</param>
/// <returns>Array of objects.</returns>
/// <remarks>You are responsible for casting the objects in the array back to their proper types.</remarks>
public static object[] Unpack(string fmt, byte[] bytes)
{
if (debug) Debug.WriteLine("Format string is length {0}, {1} bytes provided.", fmt.Length, bytes.Length);
// First we parse the format string to make sure it's proper.
if (fmt.Length < 1) throw new ArgumentException("Format string cannot be empty.");
bool endianFlip = false;
if (fmt.Substring(0, 1) == "<")
{
if (debug) Debug.WriteLine(" Endian marker found: little endian");
// Little endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == false) endianFlip = true;
fmt = fmt.Substring(1);
}
else if (fmt.Substring(0, 1) == ">")
{
if (debug) Debug.WriteLine(" Endian marker found: big endian");
// Big endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == true) endianFlip = true;
fmt = fmt.Substring(1);
}
// Now, we find out how long the byte array needs to be
int totalByteLength = 0;
foreach (char c in fmt.ToCharArray())
{
//Debug.WriteLine(" Format character found: {0}", c);
switch (c)
{
case 'q':
case 'Q':
totalByteLength += 8;
break;
case 'i':
case 'L':
case 'f':
case 'I':
totalByteLength += 4;
break;
case 'h':
case 'H':
totalByteLength += 2;
break;
case 'b':
case 'B':
case 'x':
totalByteLength += 1;
break;
default:
throw new ArgumentException("Invalid character found in format string.");
}
}
if (debug) Debug.WriteLine("Endianness will {0}be flipped.", (object)(endianFlip == true ? "" : "NOT "));
if (debug) Debug.WriteLine("The byte array is expected to be {0} bytes long.", totalByteLength);
// Test the byte array length to see if it contains as many bytes as is needed for the string.
if (bytes.Length != totalByteLength) throw new ArgumentException("The number of bytes provided does not match the total length of the format string.");
// Ok, we can go ahead and start parsing bytes!
int byteArrayPosition = 0;
List<object> outputList = new List<object>();
byte[] buf;
if (debug) Debug.WriteLine("Processing byte array...");
foreach (char c in fmt.ToCharArray())
{
switch (c)
{
case 'q':
outputList.Add((object)(long)BitConverter.ToInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added signed 64-bit integer.");
break;
case 'Q':
outputList.Add((object)(ulong)BitConverter.ToUInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added unsigned 64-bit integer.");
break;
case 'l':
outputList.Add((object)(int)BitConverter.ToInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added signed 32-bit integer.");
break;
case 'L':
outputList.Add((object)(uint)BitConverter.ToUInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsignedsigned 32-bit integer.");
break;
case 'h':
outputList.Add((object)(short)BitConverter.ToInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added signed 16-bit integer.");
break;
case 'H':
outputList.Add((object)(ushort)BitConverter.ToUInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added unsigned 16-bit integer.");
break;
case 'b':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(sbyte)buf[0]);
byteArrayPosition++;
if (debug) Debug.WriteLine(" Added signed byte");
break;
case 'B':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(byte)buf[0]);
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned byte");
break;
case 'f':
outputList.Add((object)(float)BitConverter.ToSingle(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned 32-bit float.");
break;
case 'x':
byteArrayPosition++;
if (debug) Debug.WriteLine(" Ignoring a byte");
break;
default:
throw new ArgumentException("You should not be here.");
}
}
return outputList.ToArray();
}
public static object[] Unpack(int len, string fmt, byte[] bytes)
{
string _fmt = new string('L', len);
_fmt += fmt;
return Unpack(_fmt, bytes);
}
/// <summary>
/// Convert an array of objects to a byte array, along with a string that can be used with Unpack.
/// </summary>
/// <param name="items">An object array of items to convert</param>
/// <param name="LittleEndian">Set to False if you want to use big endian output.</param>
/// <param name="NeededFormatStringToRecover">Variable to place an 'Unpack'-compatible format string into.</param>
/// <returns>A Byte array containing the objects provided in binary format.</returns>
public static byte[] Pack(object[] items, bool LittleEndian, out string NeededFormatStringToRecover)
{
// make a byte list to hold the bytes of output
List<byte> outputBytes = new List<byte>();
// should we be flipping bits for proper endinanness?
bool endianFlip = (LittleEndian != BitConverter.IsLittleEndian);
// start working on the output string
string outString = (LittleEndian == false ? ">" : "<");
// convert each item in the objects to the representative bytes
foreach (object o in items)
{
byte[] theseBytes = TypeAgnosticGetBytes(o);
if (endianFlip == true) theseBytes = (byte[])theseBytes.Reverse().ToArray();
outString += GetFormatSpecifierFor(o);
outputBytes.AddRange(theseBytes);
}
NeededFormatStringToRecover = outString;
return outputBytes.ToArray();
}
public static byte[] Pack(object[] items)
{
string dummy = "";
return Pack(items, true, out dummy);
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Drawing;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.xml";
static void Main(string[] args)
{
XDocument doc = XDocument.Load(FILENAME);
string peaks = (string)doc.Element("peaks");
byte[] data = Convert.FromBase64String(peaks);
Single[] number = data.Select((x, i) => new { num = x, index = i }).GroupBy(x => x.index / 4).Select(x => BitConverter.ToSingle(x.Select(y => y.num).ToArray(), 0)).ToArray();
PointF[] points = number.Select((x, i) => new { num = x, index = i }).GroupBy(x => x.index / 2).Select(x => new PointF(x.First().num, x.Last().num)).ToArray();
}
}
}
解决方案如下(不漂亮)
void Decompress()
{
var bytes = System.Convert.FromBase64String(DataString);
var tmp_size = bytes.Length/4;
int idx = 0;
List<float> mz_list = new List<float>();
List<float> intensity_list = new List<float>();
string Out = "I";
foreach (object tmp in StructConverter.Unpack(tmp_size, "", bytes))
{
var tmp_i = StructConverter.Pack(new object[] { tmp }, false, out Out);
var tmp_f = StructConverter.Unpack("f", tmp_i)[0];
if (idx % 2 == 0) mz_list.Add((float)(tmp_f));
else intensity_list.Add((float)(tmp_f));
idx++;
}
X = mz_list;
Y = intensity_list;
}
pack/unpack 的 C# 端口(不是我的,而是我的科学怪人)
// This is a crude implementation of a format string based struct converter for C#.
// This is probably not the best implementation, the fastest implementation, the most bug-proof implementation, or even the most functional implementation.
// It's provided as-is for free. Enjoy.
public class StructConverter
{
static bool debug = false;
// We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
// This means we can have much cleaner code below.
private static byte[] TypeAgnosticGetBytes(object o)
{
if (o is int) return BitConverter.GetBytes((int)o);
if (o is uint) return BitConverter.GetBytes((uint)o);
if (o is long) return BitConverter.GetBytes((long)o);
if (o is ulong) return BitConverter.GetBytes((ulong)o);
if (o is short) return BitConverter.GetBytes((short)o);
if (o is ushort) return BitConverter.GetBytes((ushort)o);
if (o is byte || o is sbyte) return new byte[] { (byte)o };
throw new ArgumentException("Unsupported object type found");
}
private static string GetFormatSpecifierFor(object o)
{
if (o is int) return "i";
if (o is uint) return "I";
if (o is long) return "q";
if (o is ulong) return "Q";
if (o is short) return "h";
if (o is ushort) return "H";
if (o is byte) return "B";
if (o is sbyte) return "b";
throw new ArgumentException("Unsupported object type found");
}
/// <summary>
/// Convert a byte array into an array of objects based on Python's "struct.unpack" protocol.
/// </summary>
/// <param name="fmt">A "struct.pack"-compatible format string</param>
/// <param name="bytes">An array of bytes to convert to objects</param>
/// <returns>Array of objects.</returns>
/// <remarks>You are responsible for casting the objects in the array back to their proper types.</remarks>
public static object[] Unpack(string fmt, byte[] bytes)
{
if (debug) Debug.WriteLine("Format string is length {0}, {1} bytes provided.", fmt.Length, bytes.Length);
// First we parse the format string to make sure it's proper.
if (fmt.Length < 1) throw new ArgumentException("Format string cannot be empty.");
bool endianFlip = false;
if (fmt.Substring(0, 1) == "<")
{
if (debug) Debug.WriteLine(" Endian marker found: little endian");
// Little endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == false) endianFlip = true;
fmt = fmt.Substring(1);
}
else if (fmt.Substring(0, 1) == ">")
{
if (debug) Debug.WriteLine(" Endian marker found: big endian");
// Big endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == true) endianFlip = true;
fmt = fmt.Substring(1);
}
// Now, we find out how long the byte array needs to be
int totalByteLength = 0;
foreach (char c in fmt.ToCharArray())
{
//Debug.WriteLine(" Format character found: {0}", c);
switch (c)
{
case 'q':
case 'Q':
totalByteLength += 8;
break;
case 'i':
case 'L':
case 'f':
case 'I':
totalByteLength += 4;
break;
case 'h':
case 'H':
totalByteLength += 2;
break;
case 'b':
case 'B':
case 'x':
totalByteLength += 1;
break;
default:
throw new ArgumentException("Invalid character found in format string.");
}
}
if (debug) Debug.WriteLine("Endianness will {0}be flipped.", (object)(endianFlip == true ? "" : "NOT "));
if (debug) Debug.WriteLine("The byte array is expected to be {0} bytes long.", totalByteLength);
// Test the byte array length to see if it contains as many bytes as is needed for the string.
if (bytes.Length != totalByteLength) throw new ArgumentException("The number of bytes provided does not match the total length of the format string.");
// Ok, we can go ahead and start parsing bytes!
int byteArrayPosition = 0;
List<object> outputList = new List<object>();
byte[] buf;
if (debug) Debug.WriteLine("Processing byte array...");
foreach (char c in fmt.ToCharArray())
{
switch (c)
{
case 'q':
outputList.Add((object)(long)BitConverter.ToInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added signed 64-bit integer.");
break;
case 'Q':
outputList.Add((object)(ulong)BitConverter.ToUInt64(bytes, byteArrayPosition));
byteArrayPosition += 8;
if (debug) Debug.WriteLine(" Added unsigned 64-bit integer.");
break;
case 'l':
outputList.Add((object)(int)BitConverter.ToInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added signed 32-bit integer.");
break;
case 'L':
outputList.Add((object)(uint)BitConverter.ToUInt32(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsignedsigned 32-bit integer.");
break;
case 'h':
outputList.Add((object)(short)BitConverter.ToInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added signed 16-bit integer.");
break;
case 'H':
outputList.Add((object)(ushort)BitConverter.ToUInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
if (debug) Debug.WriteLine(" Added unsigned 16-bit integer.");
break;
case 'b':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(sbyte)buf[0]);
byteArrayPosition++;
if (debug) Debug.WriteLine(" Added signed byte");
break;
case 'B':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(byte)buf[0]);
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned byte");
break;
case 'f':
outputList.Add((object)(float)BitConverter.ToSingle(bytes, byteArrayPosition));
byteArrayPosition += 4;
if (debug) Debug.WriteLine(" Added unsigned 32-bit float.");
break;
case 'x':
byteArrayPosition++;
if (debug) Debug.WriteLine(" Ignoring a byte");
break;
default:
throw new ArgumentException("You should not be here.");
}
}
return outputList.ToArray();
}
public static object[] Unpack(int len, string fmt, byte[] bytes)
{
string _fmt = new string('L', len);
_fmt += fmt;
return Unpack(_fmt, bytes);
}
/// <summary>
/// Convert an array of objects to a byte array, along with a string that can be used with Unpack.
/// </summary>
/// <param name="items">An object array of items to convert</param>
/// <param name="LittleEndian">Set to False if you want to use big endian output.</param>
/// <param name="NeededFormatStringToRecover">Variable to place an 'Unpack'-compatible format string into.</param>
/// <returns>A Byte array containing the objects provided in binary format.</returns>
public static byte[] Pack(object[] items, bool LittleEndian, out string NeededFormatStringToRecover)
{
// make a byte list to hold the bytes of output
List<byte> outputBytes = new List<byte>();
// should we be flipping bits for proper endinanness?
bool endianFlip = (LittleEndian != BitConverter.IsLittleEndian);
// start working on the output string
string outString = (LittleEndian == false ? ">" : "<");
// convert each item in the objects to the representative bytes
foreach (object o in items)
{
byte[] theseBytes = TypeAgnosticGetBytes(o);
if (endianFlip == true) theseBytes = (byte[])theseBytes.Reverse().ToArray();
outString += GetFormatSpecifierFor(o);
outputBytes.AddRange(theseBytes);
}
NeededFormatStringToRecover = outString;
return outputBytes.ToArray();
}
public static byte[] Pack(object[] items)
{
string dummy = "";
return Pack(items, true, out dummy);
}
}