我在这里做错了什么(试图用 C 编写 *.srt 解析器)?
What am I doing wrong here (trying to program a *.srt parser in C)?
我刚发现一个 *.srt
文件 mpv
加载失败。所以,我想让我们制作自己的字幕解析器,将字幕的路径作为命令行参数。这是我尝试过的:
/* Intended to be a program for parsing *.srt subtitles as an alternative to video players' */
#include <ncurses.h>
#include <unistd.h>
#define SEC_IN_MIN 60
#define MIN_IN_HR 60
long get_duration(FILE *fp); // to get the duration of a dialogue in seconds
long turn_to_sec(int hours, int minutes, int seconds); // returns sum of hours and minutes, all in seconds
int main(int argc, char **argv)
{
FILE *fp;
long sec;
char ch;
if(argv[1] == NULL)
{
printf("Please enter a filename!\n");
return 1;
}
printf("Trying to open specified file %s\n",argv[1]);
fp = fopen(argv[1],"r");
if(fp == NULL)
{
printf("Error while opening file %s\n",argv[1]);
return 1;
}
initscr(); // initialise nCurses window
ch = getc(fp);
while(ch != EOF)
{
clear();
sec = get_duration(fp);
while(1)
{
if((ch = getc(fp)) == '\n')
{
if((ch = getc(fp)) == '\n' || ch == EOF)
break;
else
addch(ch);
}
addch(ch);
}
refresh();
sleep(sec);
}
endwin(); // close nCurses
fclose(fp); // close the file
return 0;
}
long get_duration(FILE *fp)
{
long duration = 0;
char ch;
short hour_start = 0, hour_end = 0, minute_start = 0, minute_end = 0, second_start = 0, second_end = 0;
short count=0;
/* just to get to the point where time-specs of the dialogue start */
while((ch = getc(fp)) != '\n');
/* extract characters until ':' to get hour_start */
while((ch = getc(fp)) != 58)
{
hour_start += ch;
count++;
}
hour_start -= (hour_start/(49*count));
/* extract characters until ':' to get minute_start */
count = 0;
while((ch = getc(fp)) != 58)
{
minute_start += ch;
count++;
}
minute_start -= (minute_start/(49*count));
/* extract characters until ',' to get second_start */
count = 0;
while((ch = getc(fp)) != 44)
{
second_start += ch;
count++;
}
second_start -= (second_start/(49*count));
/* now, see if you can find a '>' */
while((ch = getc(fp)) != 62);
ch = getc(fp); // to get rid of that space after "-->"
/* extract characters until ':' to get hour_end */
while((ch = getc(fp)) != 58)
{
hour_end += ch;
count++;
}
hour_end -= (hour_end/(49*count));
/* extract characters until ':' to get minute_end */
count = 0;
while((ch = getc(fp)) != 58)
{
minute_end += ch;
count++;
}
minute_end -= (minute_end/(49*count));
/* extract characters until ',' to get second_end */;
count = 0;
while((ch = getc(fp)) != 44)
{
second_end += ch;
count++;
}
second_end -= (second_end/(49*count));
/* finally, gonna get those values */
second_end -= second_start;
minute_end -= minute_start;
hour_end -= hour_start;
duration += (turn_to_sec(hour_end, minute_end, second_end));
/* positioning the fp to the right position just to keep the 'main()' :) */
while((ch = getc(fp)) != '\n' || ch != EOF);
return duration;
}
long turn_to_sec(int hours, int minutes, int seconds)
{
long temp;
/* manipulating hours */
temp = hours;
temp *= MIN_IN_HR;
temp *= SEC_IN_MIN;
seconds += temp;
/* manipulating minutes */
temp = minutes;
temp *= SEC_IN_MIN;
seconds += temp;
return seconds;
}
第一次尝试时,我只使用对话的开始时间作为对话的持续时间,即end_time - start_time 这就是为什么缺少这一部分的原因:
/* extract characters until ':' to get hour_end */
while((ch = getc(fp)) != 58)
{
hour_end += ch;
count++;
}
hour_end = (hour_end/(49*count));
/* extract characters until ':' to get minute_end */
count = 0;
while((ch = getc(fp)) != 58)
{
minute_end += ch;
count++;
}
minute_end = (minute_end/(49*count));
/* extract characters until ',' to get second_end */
count = 0;
while((ch = getc(fp)) != 44)
{
second_end += ch;
count++;
}
second_end = (second_end/(49*count));
变量的名称有点不同,然后我意识到我错了,但这都是无关紧要的。我之所以这么说是因为直到那时,代码工作得很好(结果出乎意料,尽管有一些垃圾)但现在它只是卡住并且什么都不做。这是为什么?非常感谢您的宝贵时间!
这是我正在尝试的文件:https://gist.github.com/gaurav712/6646ad7dfd3c487536dce9b0712471e7
您的问题之一,可能是您现在遇到的问题,是 getc()
没有 return 读取字符的 ASCII 值。如果没有更多内容可读,它也可能 return EOF。
并且由于当找到某个字符(例如“:”)时您的循环结束,并且 EOF 绝对不是那个字符,在这些情况下,您的程序将, 永远循环.
我建议将该逻辑封装到数字读取函数中:
/**
* Reads a positive number (hopefully less than INT_MAX) from a stream
* Returns -1 if the stream is at end-of-file
*
* @param FILE *fp
* @return int the number read, or -1 if EOF
*/
int readDigits(FILE *fp) {
int value = 0, c;
if (feof(fp)) {
return -1;
}
for (;;) {
c = fgetc(fp);
// EOF is not a digit so we catch it in the digit check
// if (EOF == c) {
// break;
// }
if ((c < '0') || (c > '9')) {
break;
}
value *= 10;
value += (c - '0');
}
return value;
}
int seconds(int h, int m, int s) {
return (h*60 + m) * 60 + s;
}
现在您可以:
hour_start = readDigits(fp);
if (hour_start < 0) {
// Error, do something
}
min_start = readDigits(fp);
// check if -1
sec_start = readDigits(fp);
// check if -1
sec_start = seconds(hour_start, min_start, sec_start);
...
我解决了:
我用一堆评论从头开始重写了它
/* Third attempt to create a subtitle parser
* 29 March, 2019
* 12:55
*/
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ncurses.h>
/* to convert strings of format hh:mm:ss,m_s to seconds (ignoring milliseconds for now) */
int strToTime(char start[13], char end[13]);
int main(int argc, char **args){
short count, shouldExit = 0;
int duration, timeBwDialogues;
char ch, startTimeStr[13], endTimeStr[13], prevTimeStr[13];
FILE *fp;
endTimeStr[0] = 0;
/* Check if argument is present */
if(args[1] == NULL){
printf("No file specified!\n");
exit(1);
}
/* Opening file for reading */
printf("Trying to open file \"%s\"\n", args[1]);
fp = fopen(args[1], "r");
/* Checking if file was opened properly */
if(fp == NULL){
printf("Failed to open file \"%s\"!\n", args[1]);
exit(2);
}
/* Checking if file has contents or not */
if((ch = getc(fp)) == EOF){
printf("File has no contents!\n");
exit(3);
}
ungetc(ch, fp); /* putting ch back as file isn't empty */
/* Section for setting up dialogue-number details
* If I ever want to do that
*/
/* initialising screen for nCurses */
initscr();
/* Here comes the actual stuff */
while(1){
/* As we aren't considering dialogue number, let's just skip that*/
while((getc(fp)) != '\n');
/* Reading dialogue's starting time :
* It says keep reading until you find a character other that 0-9, ','
* or ':' and store it to startTimeStr[]. Finally pass startTimeStr[]
* to strToTime() to convert it to seconds(for now)
*/
count = 0; /* Setting-up counter for the loop */
while(1){
ch = getc(fp);
if(ch == ' ' || ch == '-')
break;
startTimeStr[count] = ch;
count++;
}
startTimeStr[count] = '[=10=]'; /* to terminate the string */
/* Making a copy of endTimeStr[] in prevTimeStr[] to get timeBwDialogues */
if(endTimeStr[0]){
strcpy(prevTimeStr, endTimeStr);
/* Calculating timeBwDialogues */
timeBwDialogues = strToTime(prevTimeStr, startTimeStr);
} else
timeBwDialogues = strToTime("00:00:00,000", startTimeStr);
/* For better readability */
clear();
refresh();
/* Sleeping when there's no voice for synchronisation */
sleep(timeBwDialogues);
/* Placing the pointer to right position for reading ending time.
* Using do-while to make sure at least one character is read before checking the condition
*/
while((getc(fp)) != '>');
if((ch = getc(fp)) == ' ');
else
ungetc(ch, fp);
/* Just similar to what was done above, reading ending time */
count = 0; /* Setting-up counter for the loop */
while(1){
ch = getc(fp);
if(ch == '\n' || ch == ' ')
break;
endTimeStr[count] = ch;
count++;
}
endTimeStr[count] = '[=10=]'; /* to terminate the string */
/* Calculating duration for individual dialogues */
duration = strToTime(startTimeStr, endTimeStr); /* passing startTimeStr[] to strToTime */
/* displaying the dialogue */
while(1){
ch = getc(fp);
/* If ch is newline, the next character maybe EOF. So let's check */
if(ch == EOF){
shouldExit = 1;
break;
} else if(ch == '<'){
while((ch = getc(fp)) != '>');
continue;
} else if(ch == '\n'){
if((ch = getc(fp)) == EOF){
shouldExit = 1;
break;
} else if(ch == '\n') /* if the next character is newline, it's the end of the dialogue */
break;
else{
ungetc(ch, fp);
addch('\n');
continue;
}
}
/* As the next character to ch is not EOF, dialogue still exists(a dialogue might take multiple lines)
* and it should be put on the screen
*/
addch(ch);
}
refresh();
sleep(duration);
if(shouldExit)
break;
}
/* Closing nCurses' window */
endwin();
/* Closing the file */
fclose(fp);
return 0;
}
/* Defining the function */
int strToTime(char start[13], char end[13]){
int hour_start, hour_end, minute_start, minute_end, second_start, second_end;
/* Separating hh, mm and ss for starting time. As I said above, I'll ignore milliseconds */
/* getting hour_start */
hour_start = ((start[0] - '0')*10)+(start[1] - '0');
/* getting minute_start */
minute_start = ((start[3] - '0')*10)+(start[4] - '0');
/* getting second_start */
second_start = ((start[6] - '0')*10)+(start[7] - '0');
/* Separating hh, mm and ss for ending time. As I said above, I'll ignore milliseconds */
/* getting hour_end */
hour_end = ((end[0] - '0')*10)+(end[1] - '0');
/* getting minute_end */
minute_end = ((end[3] - '0')*10)+(end[4] - '0');
/* getting second_end */
second_end = ((end[6] - '0')*10)+(end[7] - '0');
return ( ( ( ( (hour_end - hour_start) * 60) + (minute_end - minute_start) ) * 60) + (second_end - second_start) );
}
我刚发现一个 *.srt
文件 mpv
加载失败。所以,我想让我们制作自己的字幕解析器,将字幕的路径作为命令行参数。这是我尝试过的:
/* Intended to be a program for parsing *.srt subtitles as an alternative to video players' */
#include <ncurses.h>
#include <unistd.h>
#define SEC_IN_MIN 60
#define MIN_IN_HR 60
long get_duration(FILE *fp); // to get the duration of a dialogue in seconds
long turn_to_sec(int hours, int minutes, int seconds); // returns sum of hours and minutes, all in seconds
int main(int argc, char **argv)
{
FILE *fp;
long sec;
char ch;
if(argv[1] == NULL)
{
printf("Please enter a filename!\n");
return 1;
}
printf("Trying to open specified file %s\n",argv[1]);
fp = fopen(argv[1],"r");
if(fp == NULL)
{
printf("Error while opening file %s\n",argv[1]);
return 1;
}
initscr(); // initialise nCurses window
ch = getc(fp);
while(ch != EOF)
{
clear();
sec = get_duration(fp);
while(1)
{
if((ch = getc(fp)) == '\n')
{
if((ch = getc(fp)) == '\n' || ch == EOF)
break;
else
addch(ch);
}
addch(ch);
}
refresh();
sleep(sec);
}
endwin(); // close nCurses
fclose(fp); // close the file
return 0;
}
long get_duration(FILE *fp)
{
long duration = 0;
char ch;
short hour_start = 0, hour_end = 0, minute_start = 0, minute_end = 0, second_start = 0, second_end = 0;
short count=0;
/* just to get to the point where time-specs of the dialogue start */
while((ch = getc(fp)) != '\n');
/* extract characters until ':' to get hour_start */
while((ch = getc(fp)) != 58)
{
hour_start += ch;
count++;
}
hour_start -= (hour_start/(49*count));
/* extract characters until ':' to get minute_start */
count = 0;
while((ch = getc(fp)) != 58)
{
minute_start += ch;
count++;
}
minute_start -= (minute_start/(49*count));
/* extract characters until ',' to get second_start */
count = 0;
while((ch = getc(fp)) != 44)
{
second_start += ch;
count++;
}
second_start -= (second_start/(49*count));
/* now, see if you can find a '>' */
while((ch = getc(fp)) != 62);
ch = getc(fp); // to get rid of that space after "-->"
/* extract characters until ':' to get hour_end */
while((ch = getc(fp)) != 58)
{
hour_end += ch;
count++;
}
hour_end -= (hour_end/(49*count));
/* extract characters until ':' to get minute_end */
count = 0;
while((ch = getc(fp)) != 58)
{
minute_end += ch;
count++;
}
minute_end -= (minute_end/(49*count));
/* extract characters until ',' to get second_end */;
count = 0;
while((ch = getc(fp)) != 44)
{
second_end += ch;
count++;
}
second_end -= (second_end/(49*count));
/* finally, gonna get those values */
second_end -= second_start;
minute_end -= minute_start;
hour_end -= hour_start;
duration += (turn_to_sec(hour_end, minute_end, second_end));
/* positioning the fp to the right position just to keep the 'main()' :) */
while((ch = getc(fp)) != '\n' || ch != EOF);
return duration;
}
long turn_to_sec(int hours, int minutes, int seconds)
{
long temp;
/* manipulating hours */
temp = hours;
temp *= MIN_IN_HR;
temp *= SEC_IN_MIN;
seconds += temp;
/* manipulating minutes */
temp = minutes;
temp *= SEC_IN_MIN;
seconds += temp;
return seconds;
}
第一次尝试时,我只使用对话的开始时间作为对话的持续时间,即end_time - start_time 这就是为什么缺少这一部分的原因:
/* extract characters until ':' to get hour_end */
while((ch = getc(fp)) != 58)
{
hour_end += ch;
count++;
}
hour_end = (hour_end/(49*count));
/* extract characters until ':' to get minute_end */
count = 0;
while((ch = getc(fp)) != 58)
{
minute_end += ch;
count++;
}
minute_end = (minute_end/(49*count));
/* extract characters until ',' to get second_end */
count = 0;
while((ch = getc(fp)) != 44)
{
second_end += ch;
count++;
}
second_end = (second_end/(49*count));
变量的名称有点不同,然后我意识到我错了,但这都是无关紧要的。我之所以这么说是因为直到那时,代码工作得很好(结果出乎意料,尽管有一些垃圾)但现在它只是卡住并且什么都不做。这是为什么?非常感谢您的宝贵时间!
这是我正在尝试的文件:https://gist.github.com/gaurav712/6646ad7dfd3c487536dce9b0712471e7
您的问题之一,可能是您现在遇到的问题,是 getc()
没有 return 读取字符的 ASCII 值。如果没有更多内容可读,它也可能 return EOF。
并且由于当找到某个字符(例如“:”)时您的循环结束,并且 EOF 绝对不是那个字符,在这些情况下,您的程序将, 永远循环.
我建议将该逻辑封装到数字读取函数中:
/**
* Reads a positive number (hopefully less than INT_MAX) from a stream
* Returns -1 if the stream is at end-of-file
*
* @param FILE *fp
* @return int the number read, or -1 if EOF
*/
int readDigits(FILE *fp) {
int value = 0, c;
if (feof(fp)) {
return -1;
}
for (;;) {
c = fgetc(fp);
// EOF is not a digit so we catch it in the digit check
// if (EOF == c) {
// break;
// }
if ((c < '0') || (c > '9')) {
break;
}
value *= 10;
value += (c - '0');
}
return value;
}
int seconds(int h, int m, int s) {
return (h*60 + m) * 60 + s;
}
现在您可以:
hour_start = readDigits(fp);
if (hour_start < 0) {
// Error, do something
}
min_start = readDigits(fp);
// check if -1
sec_start = readDigits(fp);
// check if -1
sec_start = seconds(hour_start, min_start, sec_start);
...
我解决了:
我用一堆评论从头开始重写了它
/* Third attempt to create a subtitle parser
* 29 March, 2019
* 12:55
*/
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ncurses.h>
/* to convert strings of format hh:mm:ss,m_s to seconds (ignoring milliseconds for now) */
int strToTime(char start[13], char end[13]);
int main(int argc, char **args){
short count, shouldExit = 0;
int duration, timeBwDialogues;
char ch, startTimeStr[13], endTimeStr[13], prevTimeStr[13];
FILE *fp;
endTimeStr[0] = 0;
/* Check if argument is present */
if(args[1] == NULL){
printf("No file specified!\n");
exit(1);
}
/* Opening file for reading */
printf("Trying to open file \"%s\"\n", args[1]);
fp = fopen(args[1], "r");
/* Checking if file was opened properly */
if(fp == NULL){
printf("Failed to open file \"%s\"!\n", args[1]);
exit(2);
}
/* Checking if file has contents or not */
if((ch = getc(fp)) == EOF){
printf("File has no contents!\n");
exit(3);
}
ungetc(ch, fp); /* putting ch back as file isn't empty */
/* Section for setting up dialogue-number details
* If I ever want to do that
*/
/* initialising screen for nCurses */
initscr();
/* Here comes the actual stuff */
while(1){
/* As we aren't considering dialogue number, let's just skip that*/
while((getc(fp)) != '\n');
/* Reading dialogue's starting time :
* It says keep reading until you find a character other that 0-9, ','
* or ':' and store it to startTimeStr[]. Finally pass startTimeStr[]
* to strToTime() to convert it to seconds(for now)
*/
count = 0; /* Setting-up counter for the loop */
while(1){
ch = getc(fp);
if(ch == ' ' || ch == '-')
break;
startTimeStr[count] = ch;
count++;
}
startTimeStr[count] = '[=10=]'; /* to terminate the string */
/* Making a copy of endTimeStr[] in prevTimeStr[] to get timeBwDialogues */
if(endTimeStr[0]){
strcpy(prevTimeStr, endTimeStr);
/* Calculating timeBwDialogues */
timeBwDialogues = strToTime(prevTimeStr, startTimeStr);
} else
timeBwDialogues = strToTime("00:00:00,000", startTimeStr);
/* For better readability */
clear();
refresh();
/* Sleeping when there's no voice for synchronisation */
sleep(timeBwDialogues);
/* Placing the pointer to right position for reading ending time.
* Using do-while to make sure at least one character is read before checking the condition
*/
while((getc(fp)) != '>');
if((ch = getc(fp)) == ' ');
else
ungetc(ch, fp);
/* Just similar to what was done above, reading ending time */
count = 0; /* Setting-up counter for the loop */
while(1){
ch = getc(fp);
if(ch == '\n' || ch == ' ')
break;
endTimeStr[count] = ch;
count++;
}
endTimeStr[count] = '[=10=]'; /* to terminate the string */
/* Calculating duration for individual dialogues */
duration = strToTime(startTimeStr, endTimeStr); /* passing startTimeStr[] to strToTime */
/* displaying the dialogue */
while(1){
ch = getc(fp);
/* If ch is newline, the next character maybe EOF. So let's check */
if(ch == EOF){
shouldExit = 1;
break;
} else if(ch == '<'){
while((ch = getc(fp)) != '>');
continue;
} else if(ch == '\n'){
if((ch = getc(fp)) == EOF){
shouldExit = 1;
break;
} else if(ch == '\n') /* if the next character is newline, it's the end of the dialogue */
break;
else{
ungetc(ch, fp);
addch('\n');
continue;
}
}
/* As the next character to ch is not EOF, dialogue still exists(a dialogue might take multiple lines)
* and it should be put on the screen
*/
addch(ch);
}
refresh();
sleep(duration);
if(shouldExit)
break;
}
/* Closing nCurses' window */
endwin();
/* Closing the file */
fclose(fp);
return 0;
}
/* Defining the function */
int strToTime(char start[13], char end[13]){
int hour_start, hour_end, minute_start, minute_end, second_start, second_end;
/* Separating hh, mm and ss for starting time. As I said above, I'll ignore milliseconds */
/* getting hour_start */
hour_start = ((start[0] - '0')*10)+(start[1] - '0');
/* getting minute_start */
minute_start = ((start[3] - '0')*10)+(start[4] - '0');
/* getting second_start */
second_start = ((start[6] - '0')*10)+(start[7] - '0');
/* Separating hh, mm and ss for ending time. As I said above, I'll ignore milliseconds */
/* getting hour_end */
hour_end = ((end[0] - '0')*10)+(end[1] - '0');
/* getting minute_end */
minute_end = ((end[3] - '0')*10)+(end[4] - '0');
/* getting second_end */
second_end = ((end[6] - '0')*10)+(end[7] - '0');
return ( ( ( ( (hour_end - hour_start) * 60) + (minute_end - minute_start) ) * 60) + (second_end - second_start) );
}