计算字符数组中的字符频率 - x86 Assembly
Counting character frequencies in an array of characters - x86 Assembly
我正在尝试计算字符串中字符的出现次数。
我的代码如下:
data segment 'DSEG'
text db "This is a sentence.",0 ; string
textSize dw $ - text - 1 ; size of string, - 1 to account for null-termination character
freqTable dd 256 DUP(0)
ends 'DSEG'
code segment 'CSEG'
start:
mov ax, data ; set segment registers
mov ds, ax
mov es, ax
;---------------------------------------
sub cx, cx
mov cx, textSize ; number of times to loop
L1:
mov ax, [OFFSET text + cx - 1] ; loop from back using cx, put character in ax
inc [OFFSET freqTable + 4*ax] ; increment table's index at the ascii value of character
LOOP L1
;---------------------------------------
mov ax, 4c00h ; return to OS
int 21h
ends 'CSEG'
end start ; set entry point
我制作了一个 DWORDS
数组,其中每个索引代表一个字符。然后我遍历字符串并尝试在每个字符的 ascii 值处递增数组。
但是,当我尝试在循环中递增时出现 wrong parameters
错误。我不确定是什么导致了这个错误。我猜我不能只是增加我尝试的方式。如何正确创建频率 table?我是不是漏掉了什么小东西?
为了帮助您理解如何计算字符,我用 EMU8086(与您的程序集兼容)创建了下一个小程序:该程序要求用户输入文件名,打开文件,读取所有字符并计算它们,然后关闭文件。
下图显示了它的工作原理:有一个具有 256 个位置的频率数组 ("freq_array")。每个位置是对应字符的计数器,例如位置65是'A'(chr(65)).
的计数器
每次从文件中读取一个字符时,字符本身用作到达其计数器的偏移量。例如,如果从文件中读取 char 48 ('0'),则数字 48 将添加到数组偏移量 (offset + 48),并且该位置会递增。当文件结束时,它的所有字符都已计算在内。
现在代码:
.model small
.stack 100h
;-----------------------------------------
.data
freq_array dw 256 dup(0) ;ARRAY OF FREQUENCIES OF EACH ASCII CHARACTER.
msj db 13,10,'Enter name of file: $'
filename db 99 ;MAX NUMBER OF CHARACTERS ALLOWED (98).
db ? ;LENGTH (NUMBER OF CHARACTERS ENTERED BY USER).
db 99 dup(0) ;CHARACTERS ENTERED BY USER. END WITH CHR(13).
filehandler dw ? ;FILE HANDLER.
the_char db ? ;CHAR READ FROM FILE.
;-----------------------------------------
.code
start:
;INITIALIZE DATA SEGMENT.
mov ax, @data
mov ds, ax
call get_source_file ;GET FILE NAME.
call count_chars ;FILL FREQ_ARRAY WITH FREQUENCIES OF CHARS.
;WAIT FOR ANY KEY.
mov ah, 7
int 21h
;FINISH PROGRAM.
mov ax, 4c00h
int 21h
;-----------------------------------------
get_source_file proc
;DISPLAY MESSAGE.
mov dx, offset msj
mov ah, 9
int 21h
;CAPTURE FILENAME FROM KEYBOARD.
mov ah, 0Ah
mov dx, offset filename
int 21h
;CAPTURED STRING ENDS WITH CHR(13), BUT FILES REQUIRE
;THE FILENAME TO END WITH CHR(0), SO LET'S CHANGE IT.
mov si, offset filename + 1 ;STRING LENGTH.
mov cl, [ si ] ;MOVE LENGTH TO CL.
mov ch, 0 ;CLEAR CH TO USE CX.
inc cx ;ONE MORE BYTE TO REACH CHR(13).
add si, cx ;NOW SI POINTS TO CHR(13).
mov al, 0
mov [ si ], al ;REPLACE CHR(13) BY 0.
ret
get_source_file endp
;-----------------------------------------
;READ ALL CHARACTERS FROM FILE INCREASING THE COUNTER OF
;EACH CHARACTER IN THE ARRAY OF FREQUENCIES. EACH CHARACTER
;IS USED AS THE OFFSET OF ITS OWN COUNTER, EXAMPLE: THE
;COUNTER FOR 'A' IS THE POSITION 65 OF FREQ_ARRAY.
count_chars proc
;OPEN FILE.
mov ah, 3dh ;SERVICE TO OPEN FILE.
mov al, 0 ;OPEN AS READ ONLY.
mov dx, offset filename + 2
int 21h
mov filehandler, ax ;NECESSARY FOR OPERATIONS ON FILE.
;COUNT CHARACTERS.
reading:
;READ ONE CHAR FROM FILE.
mov ah, 3fh ;SERVICE TO READ FROM FILE.
mov bx, filehandler
mov cx, 1 ;HOW MANY BYTES TO READ.
mov dx, offset the_char ;WHERE TO STORE THE READ BYTES.
int 21h
;CHECK END OF FILE.
cmp ax, 0
je end_reading ;IF READ ZERO BYTES, FINISH.
;INCREASE COUNTER. THE CHAR ITSELF IS BEEN USED AS INDEX: THE
;COUNTER FOR CHAR 65 ('A') IS IN THE 65th POSITION OF THE ARRAY.
mov si, offset freq_array
mov al, the_char ;USE CHAR AS OFFSET OF ITS OWN COUNTER.
mov ah, 0 ;CLEAR AH TO USE AX.
shl ax, 1 ;AX * 2, BECAUSE EVERY COUNTER IS 2 BYTES.
add si, ax ;SI POINTS TO COUNTER POSITION.
inc [ word ptr si ] ;INCREMENT COUNTER FOR CURRENT CHAR.
jmp reading ;REPEAT PROCESS.
end_reading:
;CLOSE FILE.
mov ah, 3eh ;SERVICE TO CLOSE FILE.
mov bx, filehandler
int 21h
ret
count_chars endp
;-----------------------------------------
end start
希望对您有所帮助。
这是16位,因为数组是DW。要使其与 32 位(数组 DD)兼容,请更改下一行:
freq_array dd 256 dup(0)
shl ax, 2 ;AX * 4, BECAUSE EVERY COUNTER IS 4 BYTES.
inc [ dword ptr si ] ;INCREMENT COUNTER FOR CURRENT CHAR.
X86 不允许像您那样通过使用 AX 和 CX 寄存器进行内存寻址。但是,您可以为此目的使用 EAX 和 ECX。
整个下一个代码片段位 EAX[8,31] 和 ECX[16,31] 将保持为零。
;---------------------------------------
xor eax, eax
xor ecx, ecx
mov cx, textSize ; number of times to loop
L1:
mov al, [OFFSET text + ecx - 1]
inc [OFFSET freqTable + eax*4]
loop L1
;---------------------------------------
我正在尝试计算字符串中字符的出现次数。 我的代码如下:
data segment 'DSEG'
text db "This is a sentence.",0 ; string
textSize dw $ - text - 1 ; size of string, - 1 to account for null-termination character
freqTable dd 256 DUP(0)
ends 'DSEG'
code segment 'CSEG'
start:
mov ax, data ; set segment registers
mov ds, ax
mov es, ax
;---------------------------------------
sub cx, cx
mov cx, textSize ; number of times to loop
L1:
mov ax, [OFFSET text + cx - 1] ; loop from back using cx, put character in ax
inc [OFFSET freqTable + 4*ax] ; increment table's index at the ascii value of character
LOOP L1
;---------------------------------------
mov ax, 4c00h ; return to OS
int 21h
ends 'CSEG'
end start ; set entry point
我制作了一个 DWORDS
数组,其中每个索引代表一个字符。然后我遍历字符串并尝试在每个字符的 ascii 值处递增数组。
但是,当我尝试在循环中递增时出现 wrong parameters
错误。我不确定是什么导致了这个错误。我猜我不能只是增加我尝试的方式。如何正确创建频率 table?我是不是漏掉了什么小东西?
为了帮助您理解如何计算字符,我用 EMU8086(与您的程序集兼容)创建了下一个小程序:该程序要求用户输入文件名,打开文件,读取所有字符并计算它们,然后关闭文件。
下图显示了它的工作原理:有一个具有 256 个位置的频率数组 ("freq_array")。每个位置是对应字符的计数器,例如位置65是'A'(chr(65)).
的计数器每次从文件中读取一个字符时,字符本身用作到达其计数器的偏移量。例如,如果从文件中读取 char 48 ('0'),则数字 48 将添加到数组偏移量 (offset + 48),并且该位置会递增。当文件结束时,它的所有字符都已计算在内。
现在代码:
.model small
.stack 100h
;-----------------------------------------
.data
freq_array dw 256 dup(0) ;ARRAY OF FREQUENCIES OF EACH ASCII CHARACTER.
msj db 13,10,'Enter name of file: $'
filename db 99 ;MAX NUMBER OF CHARACTERS ALLOWED (98).
db ? ;LENGTH (NUMBER OF CHARACTERS ENTERED BY USER).
db 99 dup(0) ;CHARACTERS ENTERED BY USER. END WITH CHR(13).
filehandler dw ? ;FILE HANDLER.
the_char db ? ;CHAR READ FROM FILE.
;-----------------------------------------
.code
start:
;INITIALIZE DATA SEGMENT.
mov ax, @data
mov ds, ax
call get_source_file ;GET FILE NAME.
call count_chars ;FILL FREQ_ARRAY WITH FREQUENCIES OF CHARS.
;WAIT FOR ANY KEY.
mov ah, 7
int 21h
;FINISH PROGRAM.
mov ax, 4c00h
int 21h
;-----------------------------------------
get_source_file proc
;DISPLAY MESSAGE.
mov dx, offset msj
mov ah, 9
int 21h
;CAPTURE FILENAME FROM KEYBOARD.
mov ah, 0Ah
mov dx, offset filename
int 21h
;CAPTURED STRING ENDS WITH CHR(13), BUT FILES REQUIRE
;THE FILENAME TO END WITH CHR(0), SO LET'S CHANGE IT.
mov si, offset filename + 1 ;STRING LENGTH.
mov cl, [ si ] ;MOVE LENGTH TO CL.
mov ch, 0 ;CLEAR CH TO USE CX.
inc cx ;ONE MORE BYTE TO REACH CHR(13).
add si, cx ;NOW SI POINTS TO CHR(13).
mov al, 0
mov [ si ], al ;REPLACE CHR(13) BY 0.
ret
get_source_file endp
;-----------------------------------------
;READ ALL CHARACTERS FROM FILE INCREASING THE COUNTER OF
;EACH CHARACTER IN THE ARRAY OF FREQUENCIES. EACH CHARACTER
;IS USED AS THE OFFSET OF ITS OWN COUNTER, EXAMPLE: THE
;COUNTER FOR 'A' IS THE POSITION 65 OF FREQ_ARRAY.
count_chars proc
;OPEN FILE.
mov ah, 3dh ;SERVICE TO OPEN FILE.
mov al, 0 ;OPEN AS READ ONLY.
mov dx, offset filename + 2
int 21h
mov filehandler, ax ;NECESSARY FOR OPERATIONS ON FILE.
;COUNT CHARACTERS.
reading:
;READ ONE CHAR FROM FILE.
mov ah, 3fh ;SERVICE TO READ FROM FILE.
mov bx, filehandler
mov cx, 1 ;HOW MANY BYTES TO READ.
mov dx, offset the_char ;WHERE TO STORE THE READ BYTES.
int 21h
;CHECK END OF FILE.
cmp ax, 0
je end_reading ;IF READ ZERO BYTES, FINISH.
;INCREASE COUNTER. THE CHAR ITSELF IS BEEN USED AS INDEX: THE
;COUNTER FOR CHAR 65 ('A') IS IN THE 65th POSITION OF THE ARRAY.
mov si, offset freq_array
mov al, the_char ;USE CHAR AS OFFSET OF ITS OWN COUNTER.
mov ah, 0 ;CLEAR AH TO USE AX.
shl ax, 1 ;AX * 2, BECAUSE EVERY COUNTER IS 2 BYTES.
add si, ax ;SI POINTS TO COUNTER POSITION.
inc [ word ptr si ] ;INCREMENT COUNTER FOR CURRENT CHAR.
jmp reading ;REPEAT PROCESS.
end_reading:
;CLOSE FILE.
mov ah, 3eh ;SERVICE TO CLOSE FILE.
mov bx, filehandler
int 21h
ret
count_chars endp
;-----------------------------------------
end start
希望对您有所帮助。
这是16位,因为数组是DW。要使其与 32 位(数组 DD)兼容,请更改下一行:
freq_array dd 256 dup(0)
shl ax, 2 ;AX * 4, BECAUSE EVERY COUNTER IS 4 BYTES.
inc [ dword ptr si ] ;INCREMENT COUNTER FOR CURRENT CHAR.
X86 不允许像您那样通过使用 AX 和 CX 寄存器进行内存寻址。但是,您可以为此目的使用 EAX 和 ECX。
整个下一个代码片段位 EAX[8,31] 和 ECX[16,31] 将保持为零。
;---------------------------------------
xor eax, eax
xor ecx, ecx
mov cx, textSize ; number of times to loop
L1:
mov al, [OFFSET text + ecx - 1]
inc [OFFSET freqTable + eax*4]
loop L1
;---------------------------------------