简单的字数统计 Rust 程序输出有效的标准输出,但是当通过管道传输到具有特定内容的 head 程序时会出现恐慌

Simple word count rust program outputs valid stdout but panicks when piped to head program with specific content


thread 'main' panicked at 'failed printing to stdout: Broken pipe (os error 32)', library/std/src/io/stdio.rs:993:9
stack backtrace:
   0:     0x559ffa959dc0 - std::backtrace_rs::backtrace::libunwind::trace::h72c2fb8038f1bbee
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/libunwind.rs:96
   1:     0x559ffa959dc0 - std::backtrace_rs::backtrace::trace_unsynchronized::h1e3b084883f1e78c
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/mod.rs:66
   2:     0x559ffa959dc0 - std::sys_common::backtrace::_print_fmt::h3bf6a7ebf7f0394a
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:79
   3:     0x559ffa959dc0 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h2e8cb764b7fe02e7
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:58
   4:     0x559ffa972f6c - core::fmt::write::h7a1184eaee6a8644
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/fmt/mod.rs:1080
   5:     0x559ffa957b12 - std::io::Write::write_fmt::haeeb374d93a67eac
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/mod.rs:1516
   6:     0x559ffa95beed - std::sys_common::backtrace::_print::h1d14a7f6ad632dc8
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:61
   7:     0x559ffa95beed - std::sys_common::backtrace::print::h301abac8bb2e3e81
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:48
   8:     0x559ffa95beed - std::panicking::default_hook::{{closure}}::hde0cb80358a6920a
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:208
   9:     0x559ffa95bb98 - std::panicking::default_hook::h9b1a691049a0ec8f
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:227
  10:     0x559ffa95c5d1 - std::panicking::rust_panic_with_hook::h2bdec87b60580584
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:577
  11:     0x559ffa95c179 - std::panicking::begin_panic_handler::{{closure}}::h101ca09d9df5db47
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:484
  12:     0x559ffa95a22c - std::sys_common::backtrace::__rust_end_short_backtrace::h3bb85654c20113ca
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:153
  13:     0x559ffa95c139 - rust_begin_unwind
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:483
  14:     0x559ffa95c0eb - std::panicking::begin_panic_fmt::hf0503558fbe5b251
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:437
  15:     0x559ffa957022 - std::io::stdio::print_to::h9435376f36962f3f
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:993
  16:     0x559ffa957022 - std::io::stdio::_print::h0d31d4b9faa6e1ec
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:1005
  17:     0x559ffa944807 - wordstats::main::h1c2ea6400047a5eb
  18:     0x559ffa942e73 - std::sys_common::backtrace::__rust_begin_short_backtrace::h9e31cf87ddc88116
  19:     0x559ffa942e49 - std::rt::lang_start::{{closure}}::h6c6491f05894818f
  20:     0x559ffa95c9f7 - core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::he179d32a5d10d957
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ops/function.rs:259
  21:     0x559ffa95c9f7 - std::panicking::try::do_call::hcb3d5e7be089b2b4
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:381
  22:     0x559ffa95c9f7 - std::panicking::try::h7ac93b0cd56fb701
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:345
  23:     0x559ffa95c9f7 - std::panic::catch_unwind::h7b40e396c93a4fcd
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:382
  24:     0x559ffa95c9f7 - std::rt::lang_start_internal::h142b9cc66267fea1
                               at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/rt.rs:51
  25:     0x559ffa944ae2 - main
  26:     0x7f6223a380b3 - __libc_start_main
  27:     0x559ffa94209e - _start
  28:                0x0 - <unknown>


use diacritics;
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;

struct Entry {
    word: String,
    count: u32,

static SEPARATORS: &'static [char] = &[
    ' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',

fn main() {
    let mut words: HashMap<String, u32> = HashMap::new();
    let stdin = io::stdin();
    for line in stdin.lock().lines() {
        line_processor(line.unwrap(), &mut words)
    output(&mut words);

fn line_processor(line: String, words: &mut HashMap<String, u32>) {
    let formatted_line;
    let mut word = String::new();
    formatted_line = diacritics::remove_diacritics(&line).to_lowercase();

    for c in formatted_line.chars() {
        if SEPARATORS.contains(&c) {
            add_word(word, words);
            word = String::new();
        } else {

fn add_word(word: String, words: &mut HashMap<String, u32>) {
    if word.len() > 0 {
        if words.contains_key::<str>(&word) {
            words.insert(word.to_string(), words.get(&word).unwrap() + 1);
        } else {
            words.insert(word.to_string(), 1);
        // println!("word >{}<", word.to_string())

fn output(words: &mut HashMap<String, u32>) {
    let mut stack = Vec::<Entry>::new();

    for (k, v) in words {
        stack.push(Entry {
            word: k.to_string(),
            count: *v,

    stack.sort_by(|a, b| b.count.cmp(&a.count));

    while let Some(entry) = stack.pop() {
        println!("{}\t{}", entry.count, entry.word);


cargo build --release


cat src/sample.txt | ./target/release/wordstats  | head -n 50


15  the
14  in
11  are
10  and
10  of
9   species
9   bats
8   horseshoe
8   is
6   or
6   as
5   which
5   their

一些回显内容或其他一些文件(例如cat src/main.rs | ...)就是这种情况 但不是 this file content,它是随机维基百科页面的一部分。


当我将结果通过管道传输到 head -n 50 程序时会出现问题,但当我打印完整输出时不会出现问题

知道为什么我得到这样的痕迹吗?我是不是在我的程序中以错误的方式处理某些事情,或者它可能与其他事情有关(rust lib / unix 错误行为)

我的 rustc 版本是:rustc 1.48.0 (7eac88abb 2020-11-16)



name = "wordstats"
version = "0.1.0"
authors = ["Eric Régnier <utopman@gmail.com>"]
edition = "2018"

diacritics = "0.1.1"

首先,您没有提供足够的信息来重现您的问题。您提供了使用第三方依赖项的源代码,但忽略了提供 Cargo.toml。在您的情况下,在不影响手头问题的情况下删除依赖项的使用非常容易,所以这就是我所做的。

其次,在非玩具命令行程序中使用 println! 正是出于这个原因。即,有两个问题结合在一起会产生这种不良行为:

  1. println! 如果在写入 stdout 时发生任何错误,将会出现 panic。
  2. Rust 的运行时所做的少数事情之一是 ignore SIGPIPE,这意味着您的应用程序不会收到 PIPE 信号,而是对已关闭的文件描述符的相应写入 returns 而是一个错误。 (在那个 link 中,您可以看到我公开主张改变这种行为。)

在典型的 C 程序中,SIGPIPE 不会 被忽略。它通常也没有明确处理。当进程收到它不处理的信号时,进程终止。在这种情况下,这正是您想要的。一旦 head 停止读取它的标准输入(你的标准输出),你 想要 你的程序停止,但你也希望它优雅地停止而不是恐慌或打印错误。因为那是 Unix CLI 实用程序所做的。

你有两种方法可以解决这个问题。一种方法是更改​​代码以显式处理 BrokenPipe 错误。您的代码是以一种不会发生错误的方式编写的,因为您 unwrap 读取标准输出的结果。所以你的程序不是惯用的,也没有设置来处理错误。因此,为了正确处理 BrokenPipe,我不得不做一些小改动,以便它正确冒出错误:

use std::collections::HashMap;
use std::io;
use std::io::prelude::*;

struct Entry {
    word: String,
    count: u32,

static SEPARATORS: &'static [char] = &[
    ' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',

fn main() {
    if let Err(err) = try_main() {
        if err.kind() == std::io::ErrorKind::BrokenPipe {
        // Ignore any error that may occur while writing to stderr.
        let _ = writeln!(std::io::stderr(), "{}", err);

fn try_main() -> Result<(), std::io::Error> {
    let mut words: HashMap<String, u32> = HashMap::new();
    let stdin = io::stdin();
    for result in stdin.lock().lines() {
        let line = result?;
        line_processor(line, &mut words)
    output(&mut words)?;

fn line_processor(line: String, words: &mut HashMap<String, u32>) {
    let mut word = String::new();

    for c in line.chars() {
        if SEPARATORS.contains(&c) {
            add_word(word, words);
            word = String::new();
        } else {

fn add_word(word: String, words: &mut HashMap<String, u32>) {
    if word.len() > 0 {
        if words.contains_key::<str>(&word) {
            words.insert(word.to_string(), words.get(&word).unwrap() + 1);
        } else {
            words.insert(word.to_string(), 1);
        // println!("word >{}<", word.to_string())

fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
    let mut stack = Vec::<Entry>::new();

    for (k, v) in words {
        stack.push(Entry {
            word: k.to_string(),
            count: *v,

    stack.sort_by(|a, b| b.count.cmp(&a.count));

    let stdout = io::stdout();
    let mut stdout = stdout.lock();
    while let Some(entry) = stack.pop() {
        writeln!(stdout, "{}\t{}", entry.count, entry.word)?;

第二种处理方法是返回到 SIGPIPE 的默认行为。这将使您的 Rust 应用程序表现得像 C 应用程序。这可以通过定义一个函数来将 SIGPIPE 的信号处理程序重置为 SIG_DFL:

fn reset_sigpipe() {
    unsafe {
        libc::signal(libc::SIGPIPE, libc::SIG_DFL);

fn reset_sigpipe() {
    // no-op

然后在main中将其作为第一件事调用。然后您可以删除对 BrokenPipe 错误的任何特定处理,因为它不会发生。相反,您的进程将收到一个 PIPE 信号,然后它将终止。这是完整的代码:

use std::collections::HashMap;
use std::io;
use std::io::prelude::*;

struct Entry {
    word: String,
    count: u32,

static SEPARATORS: &'static [char] = &[
    ' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',

fn main() {
    if let Err(err) = try_main() {
        let _ = writeln!(std::io::stderr(), "{}", err);

fn try_main() -> Result<(), std::io::Error> {
    let mut words: HashMap<String, u32> = HashMap::new();
    let stdin = io::stdin();
    for result in stdin.lock().lines() {
        let line = result?;
        line_processor(line, &mut words)
    output(&mut words)?;

fn line_processor(line: String, words: &mut HashMap<String, u32>) {
    let mut word = String::new();

    for c in line.chars() {
        if SEPARATORS.contains(&c) {
            add_word(word, words);
            word = String::new();
        } else {

fn add_word(word: String, words: &mut HashMap<String, u32>) {
    if word.len() > 0 {
        if words.contains_key::<str>(&word) {
            words.insert(word.to_string(), words.get(&word).unwrap() + 1);
        } else {
            words.insert(word.to_string(), 1);
        // println!("word >{}<", word.to_string())

fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
    let mut stack = Vec::<Entry>::new();

    for (k, v) in words {
        stack.push(Entry {
            word: k.to_string(),
            count: *v,

    stack.sort_by(|a, b| b.count.cmp(&a.count));

    let stdout = io::stdout();
    let mut stdout = stdout.lock();
    while let Some(entry) = stack.pop() {
        writeln!(stdout, "{}\t{}", entry.count, entry.word)?;

fn reset_sigpipe() {
    unsafe {
        libc::signal(libc::SIGPIPE, libc::SIG_DFL);

fn reset_sigpipe() {
    // no-op