在 C 中正确操作 wide char/string
Proper manipulation of wide char/string in C
OS X Yosemite 10.10.5
XCode7.2
我一直在阅读和试验一整天,大约 char/string 在 C 中,我仍然无法让它工作。
我正在尝试读取仅由宽字符组成的文件,如下所示:
んわらやま (Japanese)
我只想一次读取一个字符,然后立即写入另一个文件。
int main(int argc, const char * argv[])
{
FILE *source, *dest;
source = fopen( argv[1], "r");
if (source == NULL) {
printf ("could not open source file \n");
exit (1);
}
// if [dest] does not exist it is created
dest = fopen( argv[2], "w+");
if (dest == NULL) {
fclose(source);
printf ("could not open dest file \n");
exit (1);
}
fwide(source, 1);
fwide(dest, 1);
fileManipulator(source, dest);
fclose(source);
fclose(dest);
return 0;
}
void fileManipulator(FILE* source, FILE* dest)
{
wint_t token;
while ( WEOF != (token = getwc(source))) {
manipulateToken(token, dest);
}
}
void manipulateToken(wint_t token, FILE* dest)
{
char* pre = "- ";
char* post= " -\n";
if ( EOF == fputs(pre, dest))
{
// error handling
}
if ( WEOF == fputwc(token, dest))
{
// error handling
}
if ( EOF == fputs(post, dest))
{
// error handling
}
}
这是输出:
- „ -
- Ç -
- ì -
- „ -
- Ç -
- è -
- „ -
- Ç -
- â -
- „ -
- Ç -
- Ñ -
- „ -
- Å -
- æ -
我能理解我的问题可能与我如何读取数据有关,但如果我考虑替代方案,我就会完全陷入困境。
- 我试过使用 fgetws 但我无法将字符与另一个字符分开;
- 我已经尝试将 fwscanf 与 %ls 一起使用,但最后得到的是一个空文件;
- 我注意到 MAC OS 不提供 fgetwc 实现,即使相关手册页提到它,AFAIK getwc 应该是 fgetwc;
的宏实现
- 不确定这是否重要,但我使用 touch 命令创建了源文件;
你能帮帮我吗?
PS:也非常感谢链接到关于该论点的进一步阅读。关于此事的文件非常稀少。
XCode 问题
这个问题最初让我认为 Jonathan Leffler 解决方案不起作用。事实上,如果我通过 运行 XCode CMD+R 或通过 Terminal.
AFAIK 问题一定是某种 attribute/property/setting XCode 在 运行 时使用,因为硬编码 source 和 dest 参数仍然产生错误的输出。
为了清楚起见,我为我的代码提供了导出的方案:
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "0720"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
enableAddressSanitizer = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
<CommandLineArguments>
<CommandLineArgument
argument = "/Users/Paul/TestDirectory/Source.txt"
isEnabled = "YES">
</CommandLineArgument>
<CommandLineArgument
argument = "/Users/Paul/TestDirectory/Destination.txt"
isEnabled = "YES">
</CommandLineArgument>
</CommandLineArguments>
<AdditionalOptions>
<AdditionalOption
key = "NSZombieEnabled"
value = "YES"
isEnabled = "YES">
</AdditionalOption>
<AdditionalOption
key = "NSDOLoggingEnabled"
value = "YES"
isEnabled = "YES">
</AdditionalOption>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
此代码似乎有效。您可能不应该使用 fputs()
和窄字符串;你应该使用 fputws()
和宽字符串:L"- "
。注意 setlocale()
的使用;这是至关重要的(尝试省略它,看看你得到了什么)。
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
static void fileManipulator(FILE *source, FILE *dest);
static void manipulateToken(wint_t token, FILE *dest);
int main(int argc, const char *argv[])
{
FILE *source, *dest;
if (argc != 3)
{
fprintf(stderr, "Usage: %s input output\n", argv[0]);
exit(1);
}
setlocale(LC_ALL, "");
source = fopen(argv[1], "r");
if (source == NULL)
{
fprintf(stderr, "could not open source file %s\n", argv[1]);
exit(1);
}
dest = fopen(argv[2], "w+");
if (dest == NULL)
{
fclose(source);
fprintf(stderr, "could not open dest file %s\n", argv[2]);
exit(1);
}
fwide(source, 1);
fwide(dest, 1);
fileManipulator(source, dest);
fclose(source);
fclose(dest);
return 0;
}
static void fileManipulator(FILE *source, FILE *dest)
{
wint_t token;
while (WEOF != (token = getwc(source)))
{
manipulateToken(token, dest);
}
}
static void manipulateToken(wint_t token, FILE *dest)
{
wchar_t *pre = L"- ";
wchar_t *post = L" -\n";
if (EOF == fputws(pre, dest))
{
fprintf(stderr, "Failed to write prefix string\n");
exit(1);
}
if (WEOF == fputwc(token, dest))
{
fprintf(stderr, "Failed to write wide character %d\n", (int)token);
exit(1);
}
if (EOF == fputws(post, dest))
{
fprintf(stderr, "Failed to write suffix string\n");
exit(1);
}
}
给定一个文件,data
,包含:
$ cat data
んわらやま
$ odx data
0x0000: E3 82 93 E3 82 8F E3 82 89 E3 82 84 E3 81 BE 0A ................
0x0010:
$
(你不会有 odx
因为我写了它,但是 xxd -g 1 data
产生或多或少等效的输出。)我 运行 程序(称为 x37
) 像这样:
$ x37 data output
$ cat output
- ん -
- わ -
- ら -
- や -
- ま -
-
-
$ odx output
0x0000: 2D 20 E3 82 93 20 2D 0A 2D 20 E3 82 8F 20 2D 0A - ... -.- ... -.
0x0010: 2D 20 E3 82 89 20 2D 0A 2D 20 E3 82 84 20 2D 0A - ... -.- ... -.
0x0020: 2D 20 E3 81 BE 20 2D 0A 2D 20 0A 20 2D 0A - ... -.- . -.
0x002E:
$
使用 GCC(5.3.0,自制)和 Clang(Apple LLVM 版本 7.3.0 (clang-703.0.29))在 Mac OS X 10.11.4 上进行测试。
给定工作代码,您可以通过实验找出哪些更改是至关重要的。我还会创建函数来通过单行调用报告错误,而不需要为每个错误编写 3 或 4 行。 (其实 'use' 比 'create' 更合适——我很早以前就创建了这样一套功能,并一直在使用它们。)
OS X Yosemite 10.10.5 XCode7.2
我一直在阅读和试验一整天,大约 char/string 在 C 中,我仍然无法让它工作。
我正在尝试读取仅由宽字符组成的文件,如下所示:
んわらやま (Japanese)
我只想一次读取一个字符,然后立即写入另一个文件。
int main(int argc, const char * argv[])
{
FILE *source, *dest;
source = fopen( argv[1], "r");
if (source == NULL) {
printf ("could not open source file \n");
exit (1);
}
// if [dest] does not exist it is created
dest = fopen( argv[2], "w+");
if (dest == NULL) {
fclose(source);
printf ("could not open dest file \n");
exit (1);
}
fwide(source, 1);
fwide(dest, 1);
fileManipulator(source, dest);
fclose(source);
fclose(dest);
return 0;
}
void fileManipulator(FILE* source, FILE* dest)
{
wint_t token;
while ( WEOF != (token = getwc(source))) {
manipulateToken(token, dest);
}
}
void manipulateToken(wint_t token, FILE* dest)
{
char* pre = "- ";
char* post= " -\n";
if ( EOF == fputs(pre, dest))
{
// error handling
}
if ( WEOF == fputwc(token, dest))
{
// error handling
}
if ( EOF == fputs(post, dest))
{
// error handling
}
}
这是输出:
- „ -
- Ç -
- ì -
- „ -
- Ç -
- è -
- „ -
- Ç -
- â -
- „ -
- Ç -
- Ñ -
- „ -
- Å -
- æ -
我能理解我的问题可能与我如何读取数据有关,但如果我考虑替代方案,我就会完全陷入困境。
- 我试过使用 fgetws 但我无法将字符与另一个字符分开;
- 我已经尝试将 fwscanf 与 %ls 一起使用,但最后得到的是一个空文件;
- 我注意到 MAC OS 不提供 fgetwc 实现,即使相关手册页提到它,AFAIK getwc 应该是 fgetwc; 的宏实现
- 不确定这是否重要,但我使用 touch 命令创建了源文件;
你能帮帮我吗?
PS:也非常感谢链接到关于该论点的进一步阅读。关于此事的文件非常稀少。
XCode 问题
这个问题最初让我认为 Jonathan Leffler 解决方案不起作用。事实上,如果我通过 运行 XCode CMD+R 或通过 Terminal.
AFAIK 问题一定是某种 attribute/property/setting XCode 在 运行 时使用,因为硬编码 source 和 dest 参数仍然产生错误的输出。
为了清楚起见,我为我的代码提供了导出的方案:
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "0720"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
enableAddressSanitizer = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
<CommandLineArguments>
<CommandLineArgument
argument = "/Users/Paul/TestDirectory/Source.txt"
isEnabled = "YES">
</CommandLineArgument>
<CommandLineArgument
argument = "/Users/Paul/TestDirectory/Destination.txt"
isEnabled = "YES">
</CommandLineArgument>
</CommandLineArguments>
<AdditionalOptions>
<AdditionalOption
key = "NSZombieEnabled"
value = "YES"
isEnabled = "YES">
</AdditionalOption>
<AdditionalOption
key = "NSDOLoggingEnabled"
value = "YES"
isEnabled = "YES">
</AdditionalOption>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DA36663A1CCF4F8200615958"
BuildableName = "FileManipulator"
BlueprintName = "FileManipulator"
ReferencedContainer = "container:FileManipulator.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
此代码似乎有效。您可能不应该使用 fputs()
和窄字符串;你应该使用 fputws()
和宽字符串:L"- "
。注意 setlocale()
的使用;这是至关重要的(尝试省略它,看看你得到了什么)。
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
static void fileManipulator(FILE *source, FILE *dest);
static void manipulateToken(wint_t token, FILE *dest);
int main(int argc, const char *argv[])
{
FILE *source, *dest;
if (argc != 3)
{
fprintf(stderr, "Usage: %s input output\n", argv[0]);
exit(1);
}
setlocale(LC_ALL, "");
source = fopen(argv[1], "r");
if (source == NULL)
{
fprintf(stderr, "could not open source file %s\n", argv[1]);
exit(1);
}
dest = fopen(argv[2], "w+");
if (dest == NULL)
{
fclose(source);
fprintf(stderr, "could not open dest file %s\n", argv[2]);
exit(1);
}
fwide(source, 1);
fwide(dest, 1);
fileManipulator(source, dest);
fclose(source);
fclose(dest);
return 0;
}
static void fileManipulator(FILE *source, FILE *dest)
{
wint_t token;
while (WEOF != (token = getwc(source)))
{
manipulateToken(token, dest);
}
}
static void manipulateToken(wint_t token, FILE *dest)
{
wchar_t *pre = L"- ";
wchar_t *post = L" -\n";
if (EOF == fputws(pre, dest))
{
fprintf(stderr, "Failed to write prefix string\n");
exit(1);
}
if (WEOF == fputwc(token, dest))
{
fprintf(stderr, "Failed to write wide character %d\n", (int)token);
exit(1);
}
if (EOF == fputws(post, dest))
{
fprintf(stderr, "Failed to write suffix string\n");
exit(1);
}
}
给定一个文件,data
,包含:
$ cat data
んわらやま
$ odx data
0x0000: E3 82 93 E3 82 8F E3 82 89 E3 82 84 E3 81 BE 0A ................
0x0010:
$
(你不会有 odx
因为我写了它,但是 xxd -g 1 data
产生或多或少等效的输出。)我 运行 程序(称为 x37
) 像这样:
$ x37 data output
$ cat output
- ん -
- わ -
- ら -
- や -
- ま -
-
-
$ odx output
0x0000: 2D 20 E3 82 93 20 2D 0A 2D 20 E3 82 8F 20 2D 0A - ... -.- ... -.
0x0010: 2D 20 E3 82 89 20 2D 0A 2D 20 E3 82 84 20 2D 0A - ... -.- ... -.
0x0020: 2D 20 E3 81 BE 20 2D 0A 2D 20 0A 20 2D 0A - ... -.- . -.
0x002E:
$
使用 GCC(5.3.0,自制)和 Clang(Apple LLVM 版本 7.3.0 (clang-703.0.29))在 Mac OS X 10.11.4 上进行测试。
给定工作代码,您可以通过实验找出哪些更改是至关重要的。我还会创建函数来通过单行调用报告错误,而不需要为每个错误编写 3 或 4 行。 (其实 'use' 比 'create' 更合适——我很早以前就创建了这样一套功能,并一直在使用它们。)