计算字符数组中的字符频率 - x86 Assembly

Counting character frequencies in an array of characters - x86 Assembly

我正在尝试计算字符串中字符的出现次数。 我的代码如下:

data segment 'DSEG'
    text        db  "This is a sentence.",0     ; string
    textSize    dw  $ - text - 1                ; size of string, - 1 to account for null-termination character
    freqTable   dd  256 DUP(0)
ends 'DSEG'

code segment 'CSEG'
start:                          
mov ax, data        ; set segment registers
mov ds, ax
mov es, ax
;---------------------------------------

sub cx, cx
mov cx, textSize        ; number of times to loop
L1:
    mov ax, [OFFSET text + cx - 1]  ; loop from back using cx, put character in ax
    inc [OFFSET freqTable + 4*ax]   ; increment table's index at the ascii value of character
    LOOP L1

;---------------------------------------
mov ax, 4c00h       ; return to OS
int 21h

ends 'CSEG'
end start           ; set entry point

我制作了一个 DWORDS 数组,其中每个索引代表一个字符。然后我遍历字符串并尝试在每个字符的 ascii 值处递增数组。

但是,当我尝试在循环中递增时出现 wrong parameters 错误。我不确定是什么导致了这个错误。我猜我不能只是增加我尝试的方式。如何正确创建频率 table?我是不是漏掉了什么小东西?

为了帮助您理解如何计算字符,我用 EMU8086(与您的程序集兼容)创建了下一个小程序:该程序要求用户输入文件名,打开文件,读取所有字符并计算它们,然后关闭文件。

下图显示了它的工作原理:有一个具有 256 个位置的频率数组 ("freq_array")。每个位置是对应字符的计数器,例如位置65是'A'(chr(65)).

的计数器

每次从文件中读取一个字符时,字符本身用作到达其计数器的偏移量。例如,如果从文件中读取 char 48 ('0'),则数字 48 将添加到数组偏移量 (offset + 48),并且该位置会递增。当文件结束时,它的所有字符都已计算在内。

现在代码:

.model small
.stack 100h

;-----------------------------------------

.data

freq_array   dw 256 dup(0) ;ARRAY OF FREQUENCIES OF EACH ASCII CHARACTER. 

msj          db 13,10,'Enter name of file: $'

filename     db 99        ;MAX NUMBER OF CHARACTERS ALLOWED (98).
             db ?         ;LENGTH (NUMBER OF CHARACTERS ENTERED BY USER).
             db 99 dup(0) ;CHARACTERS ENTERED BY USER. END WITH CHR(13).

filehandler  dw ?         ;FILE HANDLER.

the_char     db ?         ;CHAR READ FROM FILE.

;-----------------------------------------

.code
start:

;INITIALIZE DATA SEGMENT.
  mov  ax, @data
  mov  ds, ax                 

  call get_source_file        ;GET FILE NAME.
  call count_chars            ;FILL FREQ_ARRAY WITH FREQUENCIES OF CHARS.

;WAIT FOR ANY KEY.    
  mov  ah, 7
  int  21h

;FINISH PROGRAM.
  mov  ax, 4c00h
  int  21h

;-----------------------------------------

get_source_file proc
;DISPLAY MESSAGE.
  mov dx, offset msj
  mov ah, 9
  int 21h      

;CAPTURE FILENAME FROM KEYBOARD.                                    
  mov ah, 0Ah
  mov dx, offset filename
  int 21h                

;CAPTURED STRING ENDS WITH CHR(13), BUT FILES REQUIRE
;THE FILENAME TO END WITH CHR(0), SO LET'S CHANGE IT.
  mov si, offset filename + 1 ;STRING LENGTH.
  mov cl, [ si ]        ;MOVE LENGTH TO CL.
  mov ch, 0             ;CLEAR CH TO USE CX. 
  inc cx                ;ONE MORE BYTE TO REACH CHR(13).
  add si, cx            ;NOW SI POINTS TO CHR(13).
  mov al, 0
  mov [ si ], al        ;REPLACE CHR(13) BY 0.

  ret
get_source_file endp

;-----------------------------------------
;READ ALL CHARACTERS FROM FILE INCREASING THE COUNTER OF
;EACH CHARACTER IN THE ARRAY OF FREQUENCIES. EACH CHARACTER
;IS USED AS THE OFFSET OF ITS OWN COUNTER, EXAMPLE: THE
;COUNTER FOR 'A' IS THE POSITION 65 OF FREQ_ARRAY.

count_chars proc
;OPEN FILE.
  mov  ah, 3dh          ;SERVICE TO OPEN FILE.
  mov  al, 0            ;OPEN AS READ ONLY.
  mov  dx, offset filename + 2
  int  21h  
  mov  filehandler, ax ;NECESSARY FOR OPERATIONS ON FILE.

;COUNT CHARACTERS.
reading:  
;READ ONE CHAR FROM FILE.
  mov  ah, 3fh          ;SERVICE TO READ FROM FILE.
  mov  bx, filehandler
  mov  cx, 1            ;HOW MANY BYTES TO READ.
  mov  dx, offset the_char ;WHERE TO STORE THE READ BYTES.  
  int  21h              

;CHECK END OF FILE.
  cmp  ax, 0
  je   end_reading      ;IF READ ZERO BYTES, FINISH.

;INCREASE COUNTER. THE CHAR ITSELF IS BEEN USED AS INDEX: THE
;COUNTER FOR CHAR 65 ('A') IS IN THE 65th POSITION OF THE ARRAY.
  mov  si, offset freq_array
  mov  al, the_char     ;USE CHAR AS OFFSET OF ITS OWN COUNTER.
  mov  ah, 0            ;CLEAR AH TO USE AX.
  shl  ax, 1            ;AX * 2, BECAUSE EVERY COUNTER IS 2 BYTES.
  add  si, ax           ;SI POINTS TO COUNTER POSITION.
  inc  [ word ptr si ]  ;INCREMENT COUNTER FOR CURRENT CHAR.
  jmp  reading          ;REPEAT PROCESS.

end_reading:           
;CLOSE FILE.
  mov  ah, 3eh          ;SERVICE TO CLOSE FILE.
  mov  bx, filehandler
  int  21h

  ret
count_chars endp

;-----------------------------------------

end start

希望对您有所帮助。

这是16位,因为数组是DW。要使其与 32 位(数组 DD)兼容,请更改下一行:

freq_array   dd 256 dup(0)

shl  ax, 2             ;AX * 4, BECAUSE EVERY COUNTER IS 4 BYTES.

inc  [ dword ptr si ]  ;INCREMENT COUNTER FOR CURRENT CHAR.

X86 不允许像您那样通过使用 AX 和 CX 寄存器进行内存寻址。但是,您可以为此目的使用 EAX 和 ECX。
整个下一个代码片段位 EAX[8,31] 和 ECX[16,31] 将保持为零。

;---------------------------------------
 xor  eax, eax
 xor  ecx, ecx
 mov  cx, textSize        ; number of times to loop
L1:
 mov  al, [OFFSET text + ecx - 1]
 inc  [OFFSET freqTable + eax*4]
 loop L1
 ;---------------------------------------