最后一个符号在 LZW 中重复
Last symbol is duplicated in LZW
我尝试实施 LZW encoding/decoding 并以以下代码结束
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
using Index = std::int16_t;
void encode(std::istream &input, std::ostream &output) {
Index index{0};
std::unordered_map<std::string, Index> dictionary{};
for (int i = 0; i < 256; ++i) {
dictionary[{static_cast<char>(i & 0xFF)}] = index++;
}
char k;
input.read(&k, sizeof(char));
std::string buffer{""};
while (input) {
const auto tmp = buffer + k;
if (dictionary.contains(tmp))
buffer = tmp;
else {
dictionary[tmp] = index++;
output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
sizeof(Index));
buffer = {k};
}
input.read(&k, sizeof(char));
}
output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
sizeof(Index));
}
void decode(std::istream &input, std::ostream &output) {
Index index{0};
std::unordered_map<Index, std::string> dictionary{};
for (int i = 0; i < 256; ++i) {
dictionary[index++] = {static_cast<char>(i & 0xFF)};
}
Index k;
input.read(reinterpret_cast<char *>(&k), sizeof(Index));
output << dictionary[k];
Index old{k};
std::string buffer;
while (input) {
input.read(reinterpret_cast<char *>(&k), sizeof(Index));
buffer = dictionary[old];
std::string tmp;
if (dictionary.contains(k)) {
const auto &entry = dictionary[k];
tmp = buffer + entry.front();
output << entry;
} else {
tmp = buffer + buffer.front();
output << tmp;
}
dictionary[index++] = tmp;
old = k;
}
}
有点效果,但解码后的最后一个字符重复:
int main() {
std::string input{"hello world!"};
std::istringstream iss{input};
std::stringstream ss{};
encode(iss, ss);
decode(ss, std::cout);
}
输出 hello world!!
而不是 hello world!
。我找不到我的错误,也许其他人可以?
正如@NathanOliver 在评论中指出的那样
将解码循环更改为
while (input.read(reinterpret_cast<char *>(&k), sizeof(Index))) {
修复它。
我尝试实施 LZW encoding/decoding 并以以下代码结束
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
using Index = std::int16_t;
void encode(std::istream &input, std::ostream &output) {
Index index{0};
std::unordered_map<std::string, Index> dictionary{};
for (int i = 0; i < 256; ++i) {
dictionary[{static_cast<char>(i & 0xFF)}] = index++;
}
char k;
input.read(&k, sizeof(char));
std::string buffer{""};
while (input) {
const auto tmp = buffer + k;
if (dictionary.contains(tmp))
buffer = tmp;
else {
dictionary[tmp] = index++;
output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
sizeof(Index));
buffer = {k};
}
input.read(&k, sizeof(char));
}
output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
sizeof(Index));
}
void decode(std::istream &input, std::ostream &output) {
Index index{0};
std::unordered_map<Index, std::string> dictionary{};
for (int i = 0; i < 256; ++i) {
dictionary[index++] = {static_cast<char>(i & 0xFF)};
}
Index k;
input.read(reinterpret_cast<char *>(&k), sizeof(Index));
output << dictionary[k];
Index old{k};
std::string buffer;
while (input) {
input.read(reinterpret_cast<char *>(&k), sizeof(Index));
buffer = dictionary[old];
std::string tmp;
if (dictionary.contains(k)) {
const auto &entry = dictionary[k];
tmp = buffer + entry.front();
output << entry;
} else {
tmp = buffer + buffer.front();
output << tmp;
}
dictionary[index++] = tmp;
old = k;
}
}
有点效果,但解码后的最后一个字符重复:
int main() {
std::string input{"hello world!"};
std::istringstream iss{input};
std::stringstream ss{};
encode(iss, ss);
decode(ss, std::cout);
}
输出 hello world!!
而不是 hello world!
。我找不到我的错误,也许其他人可以?
正如@NathanOliver 在评论中指出的那样 将解码循环更改为
while (input.read(reinterpret_cast<char *>(&k), sizeof(Index))) {
修复它。