最后一个符号在 LZW 中重复

Last symbol is duplicated in LZW

我尝试实施 LZW encoding/decoding 并以以下代码结束

#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>

using Index = std::int16_t;

void encode(std::istream &input, std::ostream &output) {
  Index index{0};
  std::unordered_map<std::string, Index> dictionary{};

  for (int i = 0; i < 256; ++i) {
    dictionary[{static_cast<char>(i & 0xFF)}] = index++;
  }

  char k;
  input.read(&k, sizeof(char));
  std::string buffer{""};
  while (input) {
    const auto tmp = buffer + k;
    if (dictionary.contains(tmp))
      buffer = tmp;
    else {
      dictionary[tmp] = index++;
      output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
                   sizeof(Index));
      buffer = {k};
    }
    input.read(&k, sizeof(char));
  }
  output.write(reinterpret_cast<const char *>(&dictionary[buffer]),
               sizeof(Index));
}

void decode(std::istream &input, std::ostream &output) {
  Index index{0};
  std::unordered_map<Index, std::string> dictionary{};

  for (int i = 0; i < 256; ++i) {
    dictionary[index++] = {static_cast<char>(i & 0xFF)};
  }

  Index k;
  input.read(reinterpret_cast<char *>(&k), sizeof(Index));
  output << dictionary[k];

  Index old{k};
  std::string buffer;
  while (input) {
    input.read(reinterpret_cast<char *>(&k), sizeof(Index));
    buffer = dictionary[old];

    std::string tmp;
    if (dictionary.contains(k)) {
      const auto &entry = dictionary[k];
      tmp = buffer + entry.front();
      output << entry;
    } else {
      tmp = buffer + buffer.front();
      output << tmp;
    }
    dictionary[index++] = tmp;
    old = k;
  }
}

有点效果,但解码后的最后一个字符重复:

int main() {
  std::string input{"hello world!"};
  std::istringstream iss{input};
  std::stringstream ss{};
  encode(iss, ss);
  decode(ss, std::cout);
}

输出 hello world!! 而不是 hello world!。我找不到我的错误,也许其他人可以?

正如@NathanOliver 在评论中指出的那样 将解码循环更改为

while (input.read(reinterpret_cast<char *>(&k), sizeof(Index))) { 

修复它。