EBCDIC 到 ASCII 转换背后的基本原理是什么?

What's the rationale behind this EBCDIC to ASCII conversion?

我想了解这种转换究竟是如何发生的。

http://www8.cs.umu.se/~isak/Snippets/a2e.c

/*
**  ASCII <=> EBCDIC conversion functions
*/

static unsigned char a2e[256] = {
          0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15,
         16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
         64, 79,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97,
        240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
        124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214,
        215,216,217,226,227,228,229,230,231,232,233, 74,224, 90, 95,109,
        121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150,
        151,152,153,162,163,164,165,166,167,168,169,192,106,208,161,  7,
         32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27,
         48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,225,
         65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 82, 83, 84, 85, 86, 87,
         88, 89, 98, 99,100,101,102,103,104,105,112,113,114,115,116,117,
        118,119,120,128,138,139,140,141,142,143,144,154,155,156,157,158,
        159,160,170,171,172,173,174,175,176,177,178,179,180,181,182,183,
        184,185,186,187,188,189,190,191,202,203,204,205,206,207,218,219,
        220,221,222,223,234,235,236,237,238,239,250,251,252,253,254,255
};

static unsigned char e2a[256] = {
          0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15,
         16, 17, 18, 19,157,133,  8,135, 24, 25,146,143, 28, 29, 30, 31,
        128,129,130,131,132, 10, 23, 27,136,137,138,139,140,  5,  6,  7,
        144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26,
         32,160,161,162,163,164,165,166,167,168, 91, 46, 60, 40, 43, 33,
         38,169,170,171,172,173,174,175,176,177, 93, 36, 42, 41, 59, 94,
         45, 47,178,179,180,181,182,183,184,185,124, 44, 37, 95, 62, 63,
        186,187,188,189,190,191,192,193,194, 96, 58, 35, 64, 39, 61, 34,
        195, 97, 98, 99,100,101,102,103,104,105,196,197,198,199,200,201,
        202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208,
        209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215,
        216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,
        123, 65, 66, 67, 68, 69, 70, 71, 72, 73,232,233,234,235,236,237,
        125, 74, 75, 76, 77, 78, 79, 80, 81, 82,238,239,240,241,242,243,
         92,159, 83, 84, 85, 86, 87, 88, 89, 90,244,245,246,247,248,249,
         48, 49, 50, 51, 52, 53, 54, 55, 56, 57,250,251,252,253,254,255
};

char ASCIItoEBCDIC(const unsigned char c)
{
        return a2e[c];
}

char EBCDICtoASCII(const unsigned char c)
{
        return e2a[c];
}

以示例文本 char sample[6] = "hello"; 为例,h 如何从 ascii 转换为 ebcdic,反之亦然?

编辑:我真的不需要转换字符串。我只想知道这些表是如何构建的。

I just want to know how these tables were constructed.

'0' in ASCII 的代码值为 48。
EBCDIC 中的 '0' 的代码值为 240。

所以我们需要将 48 映射到 240 以及将 240 映射到 48 的表。

ASCII 代码值 48 转换 '0'a2e[48] returns 240,字符的 EBCDIC 代码'0'

static unsigned char a2e[256] = {
    ...
    ...
    ...
    240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
    ...

现在使用字符 '0' 的 EBCDIC 代码 240。 e2a[240] which returns 48, 字符'0'.

的ASCII码
static unsigned char e2a[256] = {
     ...
     13 more rows
     ...
     48, 49, 50, 51, 52, 53, 54, 55, 56, 57,250,251,252,253,254,255
};

对所有其他常见字符执行此操作。为其余部分定义映射。回想一下,ASCII 只定义了 128 个字符,而 EBCDIC 定义了 256 个字符。


杂项

EBCDIC 布局与某些 punch cards.

有一定关系

在两个字符集中,字符 01、... 9 是连续的 - 这是 C 中编码的要求。

两组中,大小写字母相差1位,所以可以编码如下(还是用C的好tolower()

int from_upper_to_lower(int code) {
  return code - 'A' + 'a`;
}

可以使用 tables 在汇编程序中编写从一种字符集转换为另一种字符集的函数。

在 C 中,您可以将此字符串从 ASCII 转换为 EBCDIC

char sample[6] = "hello";

以下方式

char * AsciiToEbcdic( char *s )
{
    for ( char *p = s; *p; ++p ) *p = a2e[( unsigned char )*p];

    return s;
} 

前提是 table 可以在函数中访问。

这是一个演示程序。

#include <stdio.h>

char * AsciiToEbcdic( char *s )
{
    static unsigned char a2e[256] = 
    {
          0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15,
         16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
         64, 79,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97,
        240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
        124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214,
        215,216,217,226,227,228,229,230,231,232,233, 74,224, 90, 95,109,
        121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150,
        151,152,153,162,163,164,165,166,167,168,169,192,106,208,161,  7,
         32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27,
         48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,225,
         65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 82, 83, 84, 85, 86, 87,
         88, 89, 98, 99,100,101,102,103,104,105,112,113,114,115,116,117,
        118,119,120,128,138,139,140,141,142,143,144,154,155,156,157,158,
        159,160,170,171,172,173,174,175,176,177,178,179,180,181,182,183,
        184,185,186,187,188,189,190,191,202,203,204,205,206,207,218,219,
        220,221,222,223,234,235,236,237,238,239,250,251,252,253,254,255
    };

    for ( char *p = s; *p; ++p ) *p = a2e[( unsigned char )*p];

    return s;
} 

int main( void ) 
{
    char s[] = "@@@@@";

    printf( "\"%s\"\n", s );
    printf( "\"%s\"\n", AsciiToEbcdic( s ) );

    return 0;
}

它的输出是

"@@@@@"
"|||||"