更改 TASM 中的字符串

change string in TASM

我正在尝试使用此算法编写程序来搜索输入字符串中的最小单词。

我的算法:

Read character from input, but not echo
If character is space:
    current_string_length = 0;
    current_string = "";
    echo character
Else If character belong to English alphabet:
    current_string_length++;
    current_string += character;
    if current_string_length < max_string_length:
       max_string = current_string;
       max_string_length = current_length_string;
    echo character
Else If character is "\n":
    print max_string

但我是汇编新手,找不到向字符串添加字符和清理字符串的方法。我该怎么做,或者我可能需要为此任务选择不同的算法?

我的代码:

.model small
.stack 100h                             ; reserves 100h bytes for stack

.data
;---------------------------------------------------------------------------------- 
; Variables
maxString           db 128 dup('$') 
currentString       db 128 dup('$')
maxLength           dw 0
currentLength       dw 0
;---------------------------------------------------------------------------------- 
; Messages
helloMessage  db 10,13,'Assembly Shortest Word Finder Version 1.0 Copyright (c) 2016 RodionSoft',10,13,10,13,'Usage: enter string with length of words not more then 128 characters',10,13,10,13,10,13,10,13,'Enter string: $'
resultMessage db 10,13,"Shortest word: $"
;---------------------------------------------------------------------------------- 
; Program
.code
start :
MOV AX, @data
MOV DS, AX
;----------------------------------------------------------------------------------
; Print helloMessage
    lea dx, helloMessage                ; LEA - Load Affective Address 
    mov ah, 9                           ; print the string of the adress 
    int 21h                             ; present in DX register


;----------------------------------------------------------------------------------
; main loop
    repeat:
        ; -------------------------------------------------------------------------
        ; Read character but not echo
        mov ah, 08h                     
        int 21h
        mov ah, 0                       ; ah = 0

        cmp al, 13h                     ; if(al == enter)
        jz printResult                  ;   printResult()
        cmp al, 20h                     ; if(al == enter)   
        jz spaceinput                   ;   spaceInput()
        ; -------------------------------------------------------------------------
        cmp al, 41h                     ; if(al < 'A')
        jl badInput                     ;   badInput()
        cmp al, 7Ah                     ; if(al > 'z')
        jg badInput                     ;   badInput()
        cmp al, 5Bh                     ; if(al < '[')
        jg goodInput                    ;   goodInput()
        cmp al, 60h                     ; if(al > '`')
        jg goodInput                    ;   goodInput()
        jmp badInput                    ; else badInput()

        goodInput:
            inc currentString
            ; currentString += al


        badInput:
            jmp repeat

        spaceInput:
            mov currentLength, 0
            ;clean currentString

        endOfIteration:
            mov ah, 2                       ; echo
            int 21h
            jmp repeat                      ; loop
;---------------------------------------------------------------------------------- 
printResult:
    lea dx, secondMessage               
    mov ah, 9                           
    int 21h 
    lea dx, maxString               
    mov ah, 9                           
    int 21h 
;----------------------------------------------------------------------------------
exit:
    MOV AX, 4c00h
    INT 21h

StringComparison proc 
    push cx dx bx ax bp si di           ; save general-purpose registers    

    mov cx, maxLength                   ; cx = maxLength
    mov dx, currentLength               ; dx = currentLength
    cmp cx, dx                          ; if(currentLength > maxLength)
    jl currentBigger                    ;   currentBigger()
    jmp return                          ; else return

    currentBigger:
        ; maxString = currentString
    return:
    pop di si bp ax bx dx cx            ; restore general-purpose registers
    ret 
endp
end start

can't find way to add character to string and clean string.

嗯,首先,这取决于您对什么是字符串的定义(这是汇编中的常见主题,决定了您如何存储数据,即。哪个 bits/bytes 用于什么以及什么意思你给他们)。

例如 resultMessage。它由具有 ASCII 编码值的连续字节组成,以值 '$' 结尾,用作 DOS 服务的终止符。

在 C/C++ 中,经典的字符串文字是相似的,但对于终止符,使用值 0

在(旧的 16b)Pascal 中,第一个字节包含长度为 0-255 的字符串,后面 "length" 个字节包含 ASCII 字母,末尾没有终止符。

在Linux中,向控制台显示字符串的系统调用将指针指向DOS/C定义中的字母,但没有任何终止符,字符串的长度必须作为第二个提供参数,这取决于程序员如何获得它。

所以,像字符串这样简单的东西,您已经有 4 种不同的方法将它存储在内存中。

但在您的情况下,您不需要只处理最终字符串,而是构建并更改它,因此最简单的方法可能是分配一些内存字节数组:currentString db 128 dup('$')

为了在某些寄存器中保留 end() 指针,假设 si

那么常见的任务可以这样实现:

; all callable subroutines bellow expect the register "si"
; to point beyond last character of currentString
; (except the clearString of course, which works always)

appendLetterInAL:
    cmp     si,OFFSET currentString+127 ; 127 to have one byte for '$'
    jae     appendLetterInAL_bufferIsFull_Ignore
    mov     [si],al  ; store new letter after previous last
    inc     si       ; update "si" to point to new end()
appendLetterInAL_bufferIsFull_Ignore:
    ret

clearString:         ; works also as INIT at the start of code
    lea     si,[currentString]
    ret

prepareStringForDOSOutput:
    mov     BYTE PTR [si],'$'  ; set terminator at end()
    lea     dx,[currentString] ; dx = pointer to string
    ret

getLengthOfString:  ; sets cx to length of current string
    ; lea     cx,[si - currentString] ; probably not allowed in 16b?
    ; other variant
    mov     cx,si
    sub     cx,OFFSET currentString
    ret

copyCurrentStringToDI:
  ; copies current string to buffer @di
  ; and also terminates it in DOS way with '$'
  ; upon return di contains original value
    push    bx
    lea     bx,[currentString]
    push    di
copyCurrentStringToDI_loop:
    cmp     bx,si      ; all bytes copied
    jae     copyCurrentStringToDI_finish
    mov     al,[bx]
    inc     bx
    mov     [di],al
    inc     di
    jmp     copyCurrentStringToDI_loop
copyCurrentStringToDI_finish:
    mov     BYTE PTR [di],'$'  ; set DOS terminator
    pop     di                 ; restore di to original value
    pop     bx                 ; restore also bx
    ret

所以基本上两个指针(si 中的当前 end(),以及在编译时固定为 currentString 的字符串的开始)足以对其进行许多操作。

我希望算法和使用的数据结构很容易从代码和注释中理解。