为什么 scanf 跳过文件重定向的大部分输入? [C]

why is scanf skipping large part of input from file redirect? [C]

我这里有一个名为 simplechain.c 的程序,它基本上有一个程序 fork 一次,child 做同样的事情并且持续运行一定次数,然后每个进程现在(在由于 wait() 导致的逆序)读取一定数量的字符并在它们足够时打印它们:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <sys/types.h>
#include <getopt.h>
#include <errno.h>

// functiont that returns 0 (fail) if a string has anything but numbers 1 (success) if it all numbers
int isnumber(char * input){
        int length = strlen (input);
        int i;
        for (i = 0 ; i < length; i++)
        {
                if (!isdigit(input[i]))
                {
                        return 0;
                }
        }

        return 1;
}


int main (int argc, char *argv[]) {
        pid_t childpid = 0;
        int p;
        int nprocs = 4;
        int nchars = 80;
        int sleep_time = 3;
        int niters = 1;
        int opt;

        // argument handler
        while((opt = getopt(argc, argv, "hp:c:s:i:")) != -1)
        {
                switch(opt)
                {
                case 'h':       // help info
                         exit(0);
                        break;
                case 'p':       // set the amount of processes
                        if(!isnumber(optarg)){
                                exit(-1);
                        }
                        nprocs = atoi(optarg); // store the specified number of processes
                        break;
                case 'c':       // set the amount of characters to be read
                        if(!isnumber(optarg)){
                                exit(-1);
                        }
                        nchars = atoi(optarg); // store the specified number of character per interation to be read
                        break;
                case 's':       // set the amount of sleep time per interation
                        if(!isnumber(optarg)){
                                exit(-1);
                        }
                        sleep_time = atoi(optarg); // store the specified nubmer of sleep time in seconds per interation
                        break;
                case 'i':       // set the amount of iterationc per print
                        if(!isnumber(optarg)){
                                exit(-1);
                        }
                        niters = atoi(optarg); // store the specified number of interations to run on the print statments
                        break;
                }
        }
        // first start the for loop and fork if the current process number 'p' is less than the amount of processes
        for (p = 1; p < nprocs; p++)
                if ( (childpid = fork()) )
                        break;
        //sleep for 10 seconds
        sleep(10);

        // now print the process info 'niters' times
        int i;
        for(i = 0; i < niters; i++){
                wait(); // wait befor the prints

                fprintf(stderr, "i:%d ",p);                             // current process Number (not to be confused with ID)
                fprintf(stderr, " parent ID:%d ", (long)getppid());     // parent procsess ID
                fprintf(stderr, " child ID:%d ", (long)childpid);       // child process ID

                // now prompt the user for a certain amount (nchars) of characaters
                fprintf(stderr, "\nPlease enter %d characters pressing enter after each one: ", nchars);
                // read the input of each char and store in in the myBuf array
                char myBuf[nchars + 1];
                int c;

                for(c = 0; c < nchars; c++){
                        scanf(" %c", &myBuf[c]);
                }
                myBuf[nchars] = '[=10=]';   // add the null terminator so that it becomes a valid string

                sleep(sleep_time);      // sleep for a certain amount of (sleep_time) seconds

                // finally print the current process ID followed by the string 'myBuf'
                fprintf(stderr, "process ID:%d;  '%s' \n", (long)getpid(), myBuf);
        }

        return 0;
}

兴趣点是使用 scanf() 的循环。它非常简单,因为它只读取一定数量的字符并将这些字符存储到 myBug 数组中。你显然可以只做标准输入但我是(顺便说一句工作正常)但我想使用一个文件所以我做了一个文件重定向给定的休闲文件

In_Congress,_July_4,_1776_The_unanimous_Declaration_of_da_thirteen_united_States_of_America,_When_in_the_Course_of_human_events,_it_becomes_necessary_for_one_people_to_dissolve_the_political_bands_which_have_connected_them_with_another,_and_to_assume_among_the_powers_of_the_earth,_the_separate_and_equal_station_to_which_the_Laws_of_Nature_and_of_Nature's_God_entitle_them,_a_decent_respect_to_the_opinions_of_mankind_requires_that_they_should_declare_the_causes_which_impel_them_to_the_separation._We_hold_these_truths_to_be_self-evident,_that_all_men_are_created_equal,_that_they_are_endowed_by_their_Creator_with_certain_unalienable_Rights,_that_among_these_are_Life,_Liberty_and_the_pursuit_of_Happiness.--That_to_secure_these_rights,_Governments_are_instituted_among_Men,_deriving_their_just_powers_from_the_consent_of_the_governed,_--That_whenever_any_Form_of_Government_becomes_destructive_of_these_ends,_it_is_the_Right_of_the_People_to_alter_or_to_abolish_it,_and_to_institute_new_Government,_laying_its_foundation_on_such_principles_and_organizing_its_powers_in_such_form,_as_to_them_shall_seem_most_likely_to_effect_their_Safety_and_Happiness._Prudence,_indeed,_will_dictate_that_Governments_long_established_should_not_be_changed_for_light_and_transient_causes;_and_accordingly_all_experience_hath_shewn,_that_mankind_are_more_disposed_to_suffer,_while_evils_are_sufferable,_than_to_right_themselves_by_abolishing_the_forms_to_which_they_are_accustomed._But_when_a_long_train_of_abuses_and_usurpations,_pursuing_invariably_the_same_Object_evinces_a_design_to_reduce_them_under_absolute_Despotism,_it_is_their_right,_it_is_their_duty,_to_throw_off_such_Government,_and_to_provide_new_Guards_for_their_future_security.--Such_has_been_the_patient_sufferance_of_these_Colonies;_and_such_is_now_the_necessity_which_constrains_them_to_alter_their_former_Systems_of_Government._The_history_of_the_present_King_of_Great_Britain_is_a_history_of_repeated_injuries_and_usurpations,_all_having_in_direct_object_the_establishment_of_an_absolute_Tyranny_over_these_States._To_prove_this,_let_Facts_be_submitted_to_a_candid_world._He_has_refused_his_Assent_to_Laws,_the_most_wholesome_and_necessary_for_the_public_good._He_has_forbidden_his_Governors_to_pass_Laws_of_immediate_and_pressing_importance,_unless_suspended_in_their_operation_till_his_Assent_should_be_obtained;_and_when_so_suspended,_he_has_utterly_neglected_to_attend_to_them._He_has_refused_to_pass_other_Laws_for_the_accommodation_of_large_districts_of_people,_unless_those_people_would_relinquish_the_right_of_Representation_in_the_Legislature,_a_right_inestimable_to_them_and_formidable_to_tyrants_only._He_has_called_together_legislative_bodies_at_places_unusual,_uncomfortable,_and_distant_from_the_depository_of_their_public_Records,_for_the_sole_purpose_of_fatiguing_them_into_compliance_with_his_measures._He_has_dissolved_Representative_Houses_repeatedly,_for_opposing_with_manly_firmness_his_invasions_on_the_rights_of_the_people._He_has_refused_for_a_long_time,_after_such_dissolutions,_to_cause_others_to_be_elected;_whereby_the_Legislative_powers,_incapable_of_Annihilation,_have_returned_to_the_People_at_large_for_their_exercise;_the_State_remaining_in_the_mean_time_exposed_to_all_the_dangers_of_invasion_from_without,_and_convulsions_within._He_has_endeavoured_to_prevent_the_population_of_these_States;_for_that_purpose_obstructing_the_Laws_for_Naturalization_of_Foreigners;_refusing_to_pass_others_to_encourage_their_migrations_hither,_and_raising_the_conditions_of_new_Appropriations_of_Lands._He_has_obstructed_the_Administration_of_Justice,_by_refusing_his_Assent_to_Laws_for_establishing_Judiciary_powers._He_has_made_Judges_dependent_on_his_Will_alone,_for_the_tenure_of_their_offices,_and_the_amount_and_payment_of_their_salaries._He_has_erected_a_multitude_of_New_Offices,_and_sent_hither_swarms_of_Officers_to_harrass_our_people,_and_eat_out_their_substance._He_has_kept_among_us,_in_times_of_peace,_Standing_Armies_without_the_Consent_of_our_legislatures._He_has_affected_to_render_the_Military_independent_of_and_superior_to_the_Civil_power._He_has_combined_with_others_to_subject_us_to_a_jurisdiction_foreign_to_our_constitution,_and_unacknowledged_by_our_laws;_giving_his_Assent_to_their_Acts_of_pretended_Legislation:_For_Quartering_large_bodies_of_armed_troops_among_us:_For_protecting_them,_by_a_mock_Trial,_from_punishment_for_any_Murders_which_they_should_commit_on_the_Inhabitants_of_these_States:_For_cutting_off_our_Trade_with_all_parts_of_the_world:_For_imposing_Taxes_on_us_without_our_Consent:_For_depriving_us_in_many_cases,_of_the_benefits_of_Trial_by_Jury:_For_transporting_us_beyond_Seas_to_be_tried_for_pretended_offences_For_abolishing_the_free_System_of_English_Laws_in_a_neighbouring_Province,_establishing_therein_an_Arbitrary_government,_and_enlarging_its_Boundaries_so_as_to_render_it_at_once_an_example_and_fit_instrument_for_introducing_the_same_absolute_rule_into_these_Colonies:For_taking_away_our_Charters,_abolishing_our_most_valuable_Laws,_and_altering_fundamentally_the_Forms_of_our_Governments:_For_suspending_our_own_Legislatures,_and_declaring_themselves_invested_with_power_to_legislate_for_us_in_all_cases_whatsoever._He_has_abdicated_Government_here,_by_declaring_us_out_of_his_Protection_and_waging_War_against_us._He_has_plundered_our_seas,_ravaged_our_Coasts,_burnt_our_towns,_and_destroyed_the_lives_of_our_people._He_is_at_this_time_transporting_large_Armies_of_foreign_Mercenaries_to_compleat_the_works_of_death,_desolation_and_tyranny,_already_begun_with_circumstances_of_Cruelty_&_perfidy_scarcely_paralleled_in_the_most_barbarous_ages,_and_totally_unworthy_the_Head_of_a_civilized_nation._He_has_constrained_our_fellow_Citizens_taken_Captive_on_the_high_Seas_to_bear_Arms_against_their_Country,_to_become_the_executioners_of_their_friends_and_Brethren,_or_to_fall_themselves_by_their_Hands._He_has_excited_domestic_insurrections_amongst_us,_and_has_endeavoured_to_bring_on_the_inhabitants_of_our_frontiers,_the_merciless_Indian_Savages,_whose_known_rule_of_warfare,_is_an_undistinguished_destruction_of_all_ages,_sexes_and_conditions._In_every_stage_of_these_Oppressions_We_have_Petitioned_for_Redress_in_the_most_humble_terms:_Our_repeated_Petitions_have_been_answered_only_by_repeated_injury._A_Prince_whose_character_is_thus_marked_by_every_act_which_may_define_a_Tyrant,_is_unfit_to_be_the_ruler_of_a_free_people._Nor_have_We_been_wanting_in_attentions_to_our_Brittish_brethren._We_have_warned_them_from_time_to_time_of_attempts_by_their_legislature_to_extend_an_unwarrantable_jurisdiction_over_us._We_have_reminded_them_of_the_circumstances_of_our_emigration_and_settlement_here._We_have_appealed_to_their_native_justice_and_magnanimity,_and_we_have_conjured_them_by_the_ties_of_our_common_kindred_to_disavow_these_usurpations,_which,_would_inevitably_interrupt_our_connections_and_correspondence._They_too_have_been_deaf_to_the_voice_of_justice_and_of_consanguinity._We_must,_therefore,_acquiesce_in_the_necessity,_which_denounces_our_Separation,_and_hold_them,_as_we_hold_the_rest_of_mankind,_Enemies_in_War,_in_Peace_Friends._e,_therefore,_the_Representatives_of_the_united_States_of_America,_in_General_Congress,_Assembled,_appealing_to_the_Supreme_Judge_of_the_world_for_the_rectitude_of_our_intentions,_do,_in_the_Name,_and_by_Authority_of_the_good_People_of_these_Colonies,_solemnly_publish_and_declare,_That_these_United_Colonies_are,_and_of_Right_ought_to_be_Free_and_Independent_States;_that_they_are_Absolved_from_all_Allegiance_to_the_British_Crown,_and_that_all_political_connection_between_them_and_the_State_of_Great_Britain,_is_and_ought_to_be_totally_dissolved;_and_that_as_Free_and_Independent_States,_they_have_full_Power_to_levy_War,_conclude_Peace,_contract_Alliances,_establish_Commerce,_and_to_do_all_other_Acts_and_Things_which_Independent_States_may_of_right_do._And_for_the_support_of_this_Declaration,_with_a_firm_reliance_on_the_protection_of_divine_Providence,_we_mutually_pledge_to_each_other_our_Lives,_our_Fortunes_and_our_sacred_Honor._We,_therefore,_the_Representatives_of_the_united_States_of_America,_in_General_Congress,_Assembled,_appealing_to_the_Supreme_Judge_of_the_world_for_the_rectitude_of_our_intentions,_do,_in_the_Name,_and_by_Authority_of_the_good_People_of_these_Colonies,_solemnly_publish_and_declare,_That_these_United_Colonies_are,_and_of_Right_ought_to_be_Free_and_Independent_States;_that_they_are_Absolved_from_all_Allegiance_to_the_British_Crown,_and_that_all_political_connection_between_them_and_the_State_of_Great_Britain,_is_and_ought_to_be_totally_dissolved;_and_that_as_Free_and_Independent_States,_they_have_full_Power_to_levy_War,_conclude_Peace,_contract_Alliances,_establish_Commerce,_and_to_do_all_other_Acts_and_Things_which_Independent_States_may_of_right_do._And_for_the_support_of_this_Declaration,_with_a_firm_reliance_on_the_protection_of_divine_Providence,_we_mutually_pledge_to_each_other_our_Lives,_our_Fortunes_and_our_sacred_Honor.

这只是一个包含独立宣言的长文件

但是当我 运行 休闲命令时:$ ./myapp -p 5 -c 30 -s 1 -i 2 < dc.txt 此命令应使程序产生 4 个分支,每个分支循环其打印语句 2 并要求每个循环读取 30 个字符 我得到了休闲输出:

i:5  parent ID:29725  child ID:0
Please enter 30 characters pressing enter after each one: process ID:29726;  'In_Congress,_July_4,_1776_The_'
i:5  parent ID:29725  child ID:0
Please enter 30 characters pressing enter after each one: process ID:29726;  'unanimous_Declaration_of_da_th'
i:4  parent ID:29724  child ID:29726
Please enter 30 characters pressing enter after each one: process ID:29725;  't_of_and_superior_to_the_Civil'
i:4  parent ID:29724  child ID:29726
Please enter 30 characters pressing enter after each one: process ID:29725;  '_power._He_has_combined_with_o'
i:3  parent ID:29723  child ID:29725
Please enter 30 characters pressing enter after each one: process ID:29724;  'Congress,_Assembled,_appealing'
i:3  parent ID:29723  child ID:29725
Please enter 30 characters pressing enter after each one: process ID:29724;  '_to_the_Supreme_Judge_of_the_w'
i:2  parent ID:29722  child ID:29724
Please enter 30 characters pressing enter after each one: process ID:29723;  ''
i:2  parent ID:29722  child ID:29724
Please enter 30 characters pressing enter after each one: process ID:29723;  ''
i:1  parent ID:24839  child ID:29723
Please enter 30 characters pressing enter after each one: process ID:29722;  ''
i:1  parent ID:24839  child ID:29723
Please enter 30 characters pressing enter after each one: process ID:29722;  ''

我知道文件肯定有足够的字符。它适用于前几个过程。 但是,如果您查看正在阅读的实际单词,您会注意到进程之间的文本有一个跳过。最后,最后两个进程根本没有得到任何字符。我的猜测是进程 5 读取了两次然后进程 4 稍后在文本中跳过以再次读取两次,进程 3 做同样的事情并且当它到达进程 2 和 1 时它已经到达文件末尾。我不确定它为什么会跳过。

简短的回答是缓冲I/O

程序共享同一个文件流。从一个文件中读取,第一个读取文件的进程得到一个数据块(可能是 512 或 4096 字节),其他进程看不到,但其他进程的文件读取位置移动了。冲洗并重复。如果您使用文件描述符 I/O,您将不会获得相同的缓冲效果。如果您在进行分叉之前使用文件流读取一些数据,您会得到另一组结果(所有结果都显示相同的数据)。如果输入不是文件而是管道或其他东西,你会再次得到其他结果。

您可能可以通过将缓冲区大小设置得较小或无缓冲来修复它:

setvbuf(stdin, NULL, _IONBF, 0);

在进行任何输入之前。

这是对您的代码的改编,其中有一个额外的选项 -u 可以使标准输入无缓冲,另一个选项 -P 可以抑制提示。请注意,我必须将函数 isnumber() 重命名为 is_number() 以避免与 Mac 上的 <ctype.h> header 发生名称冲突——名称 isnumber() 保留用于实现。

#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

static int is_number(char *input)
{
    int length = strlen(input);
    int i;
    for (i = 0; i < length; i++)
    {
        if (!isdigit(input[i]))
        {
            return 0;
        }
    }

    return 1;
}

int main(int argc, char *argv[])
{
    pid_t childpid = 0;
    int p;
    int nprocs = 4;
    int nchars = 80;
    int sleep_time = 3;
    int niters = 1;
    int buffered = 1;
    int prompt = 1;
    int opt;

    while ((opt = getopt(argc, argv, "Puhp:c:s:i:")) != -1)
    {
        switch (opt)
        {
        case 'h':
            exit(0);
            break;
        case 'p':
            if (!is_number(optarg))
            {
                exit(-1);
            }
            nprocs = atoi(optarg);
            break;
        case 'c':
            if (!is_number(optarg))
            {
                exit(-1);
            }
            nchars = atoi(optarg);
            break;
        case 's':
            if (!is_number(optarg))
            {
                exit(-1);
            }
            sleep_time = atoi(optarg);
            break;
        case 'i':
            if (!is_number(optarg))
            {
                exit(-1);
            }
            niters = atoi(optarg);
            break;
        case 'u':
            buffered = 0;
            break;
        case 'P':
            prompt = 0;
            break;
        default:
            fprintf(stderr, "Unexpected option '%c'\n", opt);
            exit(EXIT_FAILURE);
        }
    }

    if (buffered)
    {
        fprintf(stderr, "Buffered input\n");
        setvbuf(stdin, NULL, _IOFBF, 0);
    }
    else
    {
        fprintf(stderr, "Unbuffered input\n");
        setvbuf(stdin, NULL, _IONBF, 0);
    }

    for (p = 1; p < nprocs; p++)
    {
        if ((childpid = fork()))
            break;
    }

    sleep(2);

    for (int i = 0; i < niters; i++)
    {
        int status;
        int corpse;
        while ((corpse = wait(&status)) > 0)
        {
            fprintf(stderr, "%d: child %d exited with status 0x%.4X\n",
                    getpid(), corpse, status);
        }

        fprintf(stderr, "i:%d ", p);
        fprintf(stderr, " parent ID:%d ", getppid());
        fprintf(stderr, " child ID:%d\n", childpid);

        if (prompt)
        {
            fprintf(stderr, "Please enter %d characters pressing enter after each one: ",
                    nchars);
        }

        char myBuf[nchars + 1];
        int c;

        for (c = 0; c < nchars; c++)
        {
            scanf(" %c", &myBuf[c]);
        }
        myBuf[nchars] = '[=11=]';

        sleep(sleep_time);

        fprintf(stderr, "process ID: %d;  '%s'\n", getpid(), myBuf);
    }

    return 0;
}

当 运行 在 Mac 上时,无论标准输入是否缓冲,我都会得到所需的行为。当 运行 在 RHEL 7.4 Linux 上时,buffered/unbuffered I/O 很重要。源代码在 rd31.c 中,编译后创建 rd31.

$ make rd31
gcc -std=c11 -O3 -g -Wall -Wextra -Werror -Wstrict-prototypes -Wmissing-prototypes -Wshadow -pedantic-errors rd31.c -o rd31  
$ rd31 -P -u -p 5 -c 30 -s 1 -i 2 < dec-independence
Unbuffered input
i:5  parent ID:4462  child ID:0
process ID: 4463;  'In_Congress,_July_4,_1776_The_'
i:5  parent ID:4462  child ID:0
process ID: 4463;  'unanimous_Declaration_of_da_th'
4462: child 4463 exited with status 0x0000
i:4  parent ID:4460  child ID:4463
process ID: 4462;  'irteen_united_States_of_Americ'
i:4  parent ID:4460  child ID:4463
process ID: 4462;  'a,_When_in_the_Course_of_human'
4460: child 4462 exited with status 0x0000
i:3  parent ID:4459  child ID:4462
process ID: 4460;  '_events,_it_becomes_necessary_'
i:3  parent ID:4459  child ID:4462
process ID: 4460;  'for_one_people_to_dissolve_the'
4459: child 4460 exited with status 0x0000
i:2  parent ID:4457  child ID:4460
process ID: 4459;  '_political_bands_which_have_co'
i:2  parent ID:4457  child ID:4460
process ID: 4459;  'nnected_them_with_another,_and'
4457: child 4459 exited with status 0x0000
i:1  parent ID:9082  child ID:4459
process ID: 4457;  '_to_assume_among_the_powers_of'
i:1  parent ID:9082  child ID:4459
process ID: 4457;  '_the_earth,_the_separate_and_e'
$ rd31 -P -p 5 -c 30 -s 1 -i 2 < dec-independence
Buffered input
i:5  parent ID:4491  child ID:0
process ID: 4492;  'In_Congress,_July_4,_1776_The_'
i:5  parent ID:4491  child ID:0
process ID: 4492;  'unanimous_Declaration_of_da_th'
4491: child 4492 exited with status 0x0000
i:4  parent ID:4490  child ID:4492
process ID: 4491;  't_of_and_superior_to_the_Civil'
i:4  parent ID:4490  child ID:4492
process ID: 4491;  '_power._He_has_combined_with_o'
4490: child 4491 exited with status 0x0000
i:3  parent ID:4489  child ID:4491
process ID: 4490;  'Congress,_Assembled,_appealing'
i:3  parent ID:4489  child ID:4491
process ID: 4490;  '_to_the_Supreme_Judge_of_the_w'
4489: child 4490 exited with status 0x0000
i:2  parent ID:4487  child ID:4490
process ID: 4489;  ''
i:2  parent ID:4487  child ID:4490
process ID: 4489;  ''
4487: child 4489 exited with status 0x0000
i:1  parent ID:9082  child ID:4489
process ID: 4487;  ''
i:1  parent ID:9082  child ID:4489
$