UTF8 字节到字符串 & Winsock GetStream
UTF8 Byte to String & Winsock GetStream
好吧,我正在尝试将以字节为单位的大量信息转换为字符串。 (11076 长度)
到底问题出在哪里,信息是漏字了。 (长度 10996)
看:
Winsock连接接收信息,查看过程:
public static void UpdateClient(UserConnection client)
{
string data = null;
Decoder utf8Decoder = Encoding.UTF8.GetDecoder();
Console.WriteLine("Iniciando");
byte[] buffer = ReadFully(client.TCPClient.GetStream(), 0);
int charCount = utf8Decoder.GetCharCount(buffer, 0, buffer.Length);
Char[] chars = new Char[charCount];
int charsDecodedCount = utf8Decoder.GetChars(buffer, 0, buffer.Length, chars, 0);
foreach (Char c in chars)
{
data = data + String.Format("{0}", c);
}
int buffersize = buffer.Length;
Console.WriteLine("Chars is: " + chars.Length);
Console.WriteLine("Data is: " + data);
Console.WriteLine("Byte is: " + buffer.Length);
Console.WriteLine("Size is: " + data.Length);
Server.Network.ReceiveData.SelectPacket(client.Index, data);
}
public static byte[] ReadFully(Stream stream, int initialLength)
{
if (initialLength < 1)
{
initialLength = 32768;
}
byte[] buffer = new byte[initialLength];
int read = 0;
int chunk;
chunk = stream.Read(buffer, read, buffer.Length - read);
checkreach:
read += chunk;
if (read == buffer.Length)
{
int nextByte = stream.ReadByte();
if (nextByte == -1)
{
return buffer;
}
byte[] newBuffer = new byte[buffer.Length * 2];
Array.Copy(buffer, newBuffer, buffer.Length);
newBuffer[read] = (byte)nextByte;
buffer = newBuffer;
read++;
goto checkreach;
}
byte[] ret = new byte[read];
Array.Copy(buffer, ret, read);
return ret;
}
有人有提示或解决方案吗?
UTF-8 编码的文本字节数多于字符数是完全正常的。在 UTF-8 中,一些字符(例如 á and ã)被编码为两个或更多字节。
作为 ReadFully
方法 returns 垃圾,如果你试图用它来读取超过初始缓冲区的容量,或者如果它不能用一个 Read
打电话,你不应该使用它。此外,将 char 数组转换为字符串的方式非常慢。只需使用 StreamReader
读取流并将其解码为字符串:
public static void UpdateClient(UserConnection client) {
string data;
using (StreamReader reader = new StreamReader(client.TCPClient.GetStream(), Encoding.UTF8)) {
data = reader.ReadToEnd();
}
Console.WriteLine("Data is: " + data);
Console.WriteLine("Size is: " + data.Length);
Server.Network.ReceiveData.SelectPacket(client.Index, data);
}
好吧,我正在尝试将以字节为单位的大量信息转换为字符串。 (11076 长度)
到底问题出在哪里,信息是漏字了。 (长度 10996)
看:
Winsock连接接收信息,查看过程:
public static void UpdateClient(UserConnection client) { string data = null; Decoder utf8Decoder = Encoding.UTF8.GetDecoder(); Console.WriteLine("Iniciando"); byte[] buffer = ReadFully(client.TCPClient.GetStream(), 0); int charCount = utf8Decoder.GetCharCount(buffer, 0, buffer.Length); Char[] chars = new Char[charCount]; int charsDecodedCount = utf8Decoder.GetChars(buffer, 0, buffer.Length, chars, 0); foreach (Char c in chars) { data = data + String.Format("{0}", c); } int buffersize = buffer.Length; Console.WriteLine("Chars is: " + chars.Length); Console.WriteLine("Data is: " + data); Console.WriteLine("Byte is: " + buffer.Length); Console.WriteLine("Size is: " + data.Length); Server.Network.ReceiveData.SelectPacket(client.Index, data); } public static byte[] ReadFully(Stream stream, int initialLength) { if (initialLength < 1) { initialLength = 32768; } byte[] buffer = new byte[initialLength]; int read = 0; int chunk; chunk = stream.Read(buffer, read, buffer.Length - read); checkreach: read += chunk; if (read == buffer.Length) { int nextByte = stream.ReadByte(); if (nextByte == -1) { return buffer; } byte[] newBuffer = new byte[buffer.Length * 2]; Array.Copy(buffer, newBuffer, buffer.Length); newBuffer[read] = (byte)nextByte; buffer = newBuffer; read++; goto checkreach; } byte[] ret = new byte[read]; Array.Copy(buffer, ret, read); return ret; }
有人有提示或解决方案吗?
UTF-8 编码的文本字节数多于字符数是完全正常的。在 UTF-8 中,一些字符(例如 á and ã)被编码为两个或更多字节。
作为 ReadFully
方法 returns 垃圾,如果你试图用它来读取超过初始缓冲区的容量,或者如果它不能用一个 Read
打电话,你不应该使用它。此外,将 char 数组转换为字符串的方式非常慢。只需使用 StreamReader
读取流并将其解码为字符串:
public static void UpdateClient(UserConnection client) {
string data;
using (StreamReader reader = new StreamReader(client.TCPClient.GetStream(), Encoding.UTF8)) {
data = reader.ReadToEnd();
}
Console.WriteLine("Data is: " + data);
Console.WriteLine("Size is: " + data.Length);
Server.Network.ReceiveData.SelectPacket(client.Index, data);
}