简单的字数统计 Rust 程序输出有效的标准输出,但是当通过管道传输到具有特定内容的 head 程序时会出现恐慌
Simple word count rust program outputs valid stdout but panicks when piped to head program with specific content
我有生锈的痕迹:
thread 'main' panicked at 'failed printing to stdout: Broken pipe (os error 32)', library/std/src/io/stdio.rs:993:9
stack backtrace:
0: 0x559ffa959dc0 - std::backtrace_rs::backtrace::libunwind::trace::h72c2fb8038f1bbee
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/libunwind.rs:96
1: 0x559ffa959dc0 - std::backtrace_rs::backtrace::trace_unsynchronized::h1e3b084883f1e78c
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/mod.rs:66
2: 0x559ffa959dc0 - std::sys_common::backtrace::_print_fmt::h3bf6a7ebf7f0394a
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:79
3: 0x559ffa959dc0 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h2e8cb764b7fe02e7
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:58
4: 0x559ffa972f6c - core::fmt::write::h7a1184eaee6a8644
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/fmt/mod.rs:1080
5: 0x559ffa957b12 - std::io::Write::write_fmt::haeeb374d93a67eac
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/mod.rs:1516
6: 0x559ffa95beed - std::sys_common::backtrace::_print::h1d14a7f6ad632dc8
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:61
7: 0x559ffa95beed - std::sys_common::backtrace::print::h301abac8bb2e3e81
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:48
8: 0x559ffa95beed - std::panicking::default_hook::{{closure}}::hde0cb80358a6920a
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:208
9: 0x559ffa95bb98 - std::panicking::default_hook::h9b1a691049a0ec8f
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:227
10: 0x559ffa95c5d1 - std::panicking::rust_panic_with_hook::h2bdec87b60580584
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:577
11: 0x559ffa95c179 - std::panicking::begin_panic_handler::{{closure}}::h101ca09d9df5db47
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:484
12: 0x559ffa95a22c - std::sys_common::backtrace::__rust_end_short_backtrace::h3bb85654c20113ca
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:153
13: 0x559ffa95c139 - rust_begin_unwind
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:483
14: 0x559ffa95c0eb - std::panicking::begin_panic_fmt::hf0503558fbe5b251
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:437
15: 0x559ffa957022 - std::io::stdio::print_to::h9435376f36962f3f
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:993
16: 0x559ffa957022 - std::io::stdio::_print::h0d31d4b9faa6e1ec
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:1005
17: 0x559ffa944807 - wordstats::main::h1c2ea6400047a5eb
18: 0x559ffa942e73 - std::sys_common::backtrace::__rust_begin_short_backtrace::h9e31cf87ddc88116
19: 0x559ffa942e49 - std::rt::lang_start::{{closure}}::h6c6491f05894818f
20: 0x559ffa95c9f7 - core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::he179d32a5d10d957
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ops/function.rs:259
21: 0x559ffa95c9f7 - std::panicking::try::do_call::hcb3d5e7be089b2b4
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:381
22: 0x559ffa95c9f7 - std::panicking::try::h7ac93b0cd56fb701
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:345
23: 0x559ffa95c9f7 - std::panic::catch_unwind::h7b40e396c93a4fcd
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:382
24: 0x559ffa95c9f7 - std::rt::lang_start_internal::h142b9cc66267fea1
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/rt.rs:51
25: 0x559ffa944ae2 - main
26: 0x7f6223a380b3 - __libc_start_main
27: 0x559ffa94209e - _start
28: 0x0 - <unknown>
当我编译这个程序时
use diacritics;
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
line_processor(line.unwrap(), &mut words)
}
output(&mut words);
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let formatted_line;
let mut word = String::new();
formatted_line = diacritics::remove_diacritics(&line).to_lowercase();
for c in formatted_line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
while let Some(entry) = stack.pop() {
println!("{}\t{}", entry.count, entry.word);
}
}
这样:
cargo build --release
我运行程序是这样的:
cat src/sample.txt | ./target/release/wordstats | head -n 50
这个程序应该只显示类似这样的内容(最高字数),没有任何痕迹:
15 the
14 in
11 are
10 and
10 of
9 species
9 bats
8 horseshoe
8 is
6 or
6 as
5 which
5 their
一些回显内容或其他一些文件(例如cat src/main.rs | ...
)就是这种情况
但不是 this file content,它是随机维基百科页面的一部分。
我的程序是一个愚蠢的字数统计程序,它只打印表格排序的键值列表。
当我将结果通过管道传输到 head -n 50
程序时会出现问题,但当我打印完整输出时不会出现问题
知道为什么我得到这样的痕迹吗?我是不是在我的程序中以错误的方式处理某些事情,或者它可能与其他事情有关(rust lib / unix 错误行为)
我的 rustc 版本是:rustc 1.48.0 (7eac88abb 2020-11-16)
编辑:
补缺Cargo.toml
[package]
name = "wordstats"
version = "0.1.0"
authors = ["Eric Régnier <utopman@gmail.com>"]
edition = "2018"
[dependencies]
diacritics = "0.1.1"
首先,您没有提供足够的信息来重现您的问题。您提供了使用第三方依赖项的源代码,但忽略了提供 Cargo.toml
。在您的情况下,在不影响手头问题的情况下删除依赖项的使用非常容易,所以这就是我所做的。
其次,在非玩具命令行程序中使用 println!
正是出于这个原因。即,有两个问题结合在一起会产生这种不良行为:
println!
如果在写入 stdout 时发生任何错误,将会出现 panic。
- Rust 的运行时所做的少数事情之一是 ignore SIGPIPE,这意味着您的应用程序不会收到
PIPE
信号,而是对已关闭的文件描述符的相应写入 returns 而是一个错误。 (在那个 link 中,您可以看到我公开主张改变这种行为。)
在典型的 C 程序中,SIGPIPE
不会 被忽略。它通常也没有明确处理。当进程收到它不处理的信号时,进程终止。在这种情况下,这正是您想要的。一旦 head
停止读取它的标准输入(你的标准输出),你 想要 你的程序停止,但你也希望它优雅地停止而不是恐慌或打印错误。因为那是 Unix CLI 实用程序所做的。
你有两种方法可以解决这个问题。一种方法是更改代码以显式处理 BrokenPipe
错误。您的代码是以一种不会发生错误的方式编写的,因为您 unwrap
读取标准输出的结果。所以你的程序不是惯用的,也没有设置来处理错误。因此,为了正确处理 BrokenPipe
,我不得不做一些小改动,以便它正确冒出错误:
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
if let Err(err) = try_main() {
if err.kind() == std::io::ErrorKind::BrokenPipe {
return;
}
// Ignore any error that may occur while writing to stderr.
let _ = writeln!(std::io::stderr(), "{}", err);
}
}
fn try_main() -> Result<(), std::io::Error> {
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for result in stdin.lock().lines() {
let line = result?;
line_processor(line, &mut words)
}
output(&mut words)?;
Ok(())
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let mut word = String::new();
for c in line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
let stdout = io::stdout();
let mut stdout = stdout.lock();
while let Some(entry) = stack.pop() {
writeln!(stdout, "{}\t{}", entry.count, entry.word)?;
}
Ok(())
}
第二种处理方法是返回到 SIGPIPE
的默认行为。这将使您的 Rust 应用程序表现得像 C 应用程序。这可以通过定义一个函数来将 SIGPIPE
的信号处理程序重置为 SIG_DFL
:
来实现
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
}
然后在main
中将其作为第一件事调用。然后您可以删除对 BrokenPipe
错误的任何特定处理,因为它不会发生。相反,您的进程将收到一个 PIPE
信号,然后它将终止。这是完整的代码:
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
if let Err(err) = try_main() {
let _ = writeln!(std::io::stderr(), "{}", err);
}
}
fn try_main() -> Result<(), std::io::Error> {
reset_sigpipe();
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for result in stdin.lock().lines() {
let line = result?;
line_processor(line, &mut words)
}
output(&mut words)?;
Ok(())
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let mut word = String::new();
for c in line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
let stdout = io::stdout();
let mut stdout = stdout.lock();
while let Some(entry) = stack.pop() {
writeln!(stdout, "{}\t{}", entry.count, entry.word)?;
}
Ok(())
}
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
}
我有生锈的痕迹:
thread 'main' panicked at 'failed printing to stdout: Broken pipe (os error 32)', library/std/src/io/stdio.rs:993:9
stack backtrace:
0: 0x559ffa959dc0 - std::backtrace_rs::backtrace::libunwind::trace::h72c2fb8038f1bbee
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/libunwind.rs:96
1: 0x559ffa959dc0 - std::backtrace_rs::backtrace::trace_unsynchronized::h1e3b084883f1e78c
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/mod.rs:66
2: 0x559ffa959dc0 - std::sys_common::backtrace::_print_fmt::h3bf6a7ebf7f0394a
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:79
3: 0x559ffa959dc0 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h2e8cb764b7fe02e7
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:58
4: 0x559ffa972f6c - core::fmt::write::h7a1184eaee6a8644
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/fmt/mod.rs:1080
5: 0x559ffa957b12 - std::io::Write::write_fmt::haeeb374d93a67eac
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/mod.rs:1516
6: 0x559ffa95beed - std::sys_common::backtrace::_print::h1d14a7f6ad632dc8
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:61
7: 0x559ffa95beed - std::sys_common::backtrace::print::h301abac8bb2e3e81
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:48
8: 0x559ffa95beed - std::panicking::default_hook::{{closure}}::hde0cb80358a6920a
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:208
9: 0x559ffa95bb98 - std::panicking::default_hook::h9b1a691049a0ec8f
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:227
10: 0x559ffa95c5d1 - std::panicking::rust_panic_with_hook::h2bdec87b60580584
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:577
11: 0x559ffa95c179 - std::panicking::begin_panic_handler::{{closure}}::h101ca09d9df5db47
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:484
12: 0x559ffa95a22c - std::sys_common::backtrace::__rust_end_short_backtrace::h3bb85654c20113ca
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:153
13: 0x559ffa95c139 - rust_begin_unwind
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:483
14: 0x559ffa95c0eb - std::panicking::begin_panic_fmt::hf0503558fbe5b251
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:437
15: 0x559ffa957022 - std::io::stdio::print_to::h9435376f36962f3f
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:993
16: 0x559ffa957022 - std::io::stdio::_print::h0d31d4b9faa6e1ec
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:1005
17: 0x559ffa944807 - wordstats::main::h1c2ea6400047a5eb
18: 0x559ffa942e73 - std::sys_common::backtrace::__rust_begin_short_backtrace::h9e31cf87ddc88116
19: 0x559ffa942e49 - std::rt::lang_start::{{closure}}::h6c6491f05894818f
20: 0x559ffa95c9f7 - core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::he179d32a5d10d957
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ops/function.rs:259
21: 0x559ffa95c9f7 - std::panicking::try::do_call::hcb3d5e7be089b2b4
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:381
22: 0x559ffa95c9f7 - std::panicking::try::h7ac93b0cd56fb701
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:345
23: 0x559ffa95c9f7 - std::panic::catch_unwind::h7b40e396c93a4fcd
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:382
24: 0x559ffa95c9f7 - std::rt::lang_start_internal::h142b9cc66267fea1
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/rt.rs:51
25: 0x559ffa944ae2 - main
26: 0x7f6223a380b3 - __libc_start_main
27: 0x559ffa94209e - _start
28: 0x0 - <unknown>
当我编译这个程序时
use diacritics;
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
line_processor(line.unwrap(), &mut words)
}
output(&mut words);
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let formatted_line;
let mut word = String::new();
formatted_line = diacritics::remove_diacritics(&line).to_lowercase();
for c in formatted_line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
while let Some(entry) = stack.pop() {
println!("{}\t{}", entry.count, entry.word);
}
}
这样:
cargo build --release
我运行程序是这样的:
cat src/sample.txt | ./target/release/wordstats | head -n 50
这个程序应该只显示类似这样的内容(最高字数),没有任何痕迹:
15 the
14 in
11 are
10 and
10 of
9 species
9 bats
8 horseshoe
8 is
6 or
6 as
5 which
5 their
一些回显内容或其他一些文件(例如cat src/main.rs | ...
)就是这种情况
但不是 this file content,它是随机维基百科页面的一部分。
我的程序是一个愚蠢的字数统计程序,它只打印表格排序的键值列表。
当我将结果通过管道传输到 head -n 50
程序时会出现问题,但当我打印完整输出时不会出现问题
知道为什么我得到这样的痕迹吗?我是不是在我的程序中以错误的方式处理某些事情,或者它可能与其他事情有关(rust lib / unix 错误行为)
我的 rustc 版本是:rustc 1.48.0 (7eac88abb 2020-11-16)
编辑:
补缺Cargo.toml
[package]
name = "wordstats"
version = "0.1.0"
authors = ["Eric Régnier <utopman@gmail.com>"]
edition = "2018"
[dependencies]
diacritics = "0.1.1"
首先,您没有提供足够的信息来重现您的问题。您提供了使用第三方依赖项的源代码,但忽略了提供 Cargo.toml
。在您的情况下,在不影响手头问题的情况下删除依赖项的使用非常容易,所以这就是我所做的。
其次,在非玩具命令行程序中使用 println!
正是出于这个原因。即,有两个问题结合在一起会产生这种不良行为:
println!
如果在写入 stdout 时发生任何错误,将会出现 panic。- Rust 的运行时所做的少数事情之一是 ignore SIGPIPE,这意味着您的应用程序不会收到
PIPE
信号,而是对已关闭的文件描述符的相应写入 returns 而是一个错误。 (在那个 link 中,您可以看到我公开主张改变这种行为。)
在典型的 C 程序中,SIGPIPE
不会 被忽略。它通常也没有明确处理。当进程收到它不处理的信号时,进程终止。在这种情况下,这正是您想要的。一旦 head
停止读取它的标准输入(你的标准输出),你 想要 你的程序停止,但你也希望它优雅地停止而不是恐慌或打印错误。因为那是 Unix CLI 实用程序所做的。
你有两种方法可以解决这个问题。一种方法是更改代码以显式处理 BrokenPipe
错误。您的代码是以一种不会发生错误的方式编写的,因为您 unwrap
读取标准输出的结果。所以你的程序不是惯用的,也没有设置来处理错误。因此,为了正确处理 BrokenPipe
,我不得不做一些小改动,以便它正确冒出错误:
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
if let Err(err) = try_main() {
if err.kind() == std::io::ErrorKind::BrokenPipe {
return;
}
// Ignore any error that may occur while writing to stderr.
let _ = writeln!(std::io::stderr(), "{}", err);
}
}
fn try_main() -> Result<(), std::io::Error> {
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for result in stdin.lock().lines() {
let line = result?;
line_processor(line, &mut words)
}
output(&mut words)?;
Ok(())
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let mut word = String::new();
for c in line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
let stdout = io::stdout();
let mut stdout = stdout.lock();
while let Some(entry) = stack.pop() {
writeln!(stdout, "{}\t{}", entry.count, entry.word)?;
}
Ok(())
}
第二种处理方法是返回到 SIGPIPE
的默认行为。这将使您的 Rust 应用程序表现得像 C 应用程序。这可以通过定义一个函数来将 SIGPIPE
的信号处理程序重置为 SIG_DFL
:
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
}
然后在main
中将其作为第一件事调用。然后您可以删除对 BrokenPipe
错误的任何特定处理,因为它不会发生。相反,您的进程将收到一个 PIPE
信号,然后它将终止。这是完整的代码:
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
if let Err(err) = try_main() {
let _ = writeln!(std::io::stderr(), "{}", err);
}
}
fn try_main() -> Result<(), std::io::Error> {
reset_sigpipe();
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for result in stdin.lock().lines() {
let line = result?;
line_processor(line, &mut words)
}
output(&mut words)?;
Ok(())
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let mut word = String::new();
for c in line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
let stdout = io::stdout();
let mut stdout = stdout.lock();
while let Some(entry) = stack.pop() {
writeln!(stdout, "{}\t{}", entry.count, entry.word)?;
}
Ok(())
}
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
}