如何删除字符串中的所有标点符号和空格?
How to remove all punctuation and spaces in a string?
我有这样的输入:
This is, ,,, *&% a ::; demo + String. +Need to**@!/// format:::::!!! this.`
需要输出:
ThisisademoStringNeedtoformatthis
我必须不使用使用str_trim。
编辑:我正在编写一个加密程序。在加密之前,我必须从字符串中删除所有标点符号并将所有小写字母转换为大写字母。
我添加了代码。在将其变为大写之前,我需要删除空格或任何标点符号。到目前为止,除了我们不允许使用的 str_trim 之外,我还没有在我的书中找到任何可以帮助解决这个问题的东西。
INCLUDE Irvine32.inc
.data
source byte "This is the source string",0
.code
main proc
mov esi,0 ; index register
mov ecx,SIZEOF source ; loop counter
L1:
mov al,source[esi] ; get a character from source
and source[esi], 11011111b ; convert lower case to upper case
inc esi ; move to next character
loop L1 ; repeat for entire string
mov edx, OFFSET source
call WriteString
exit
main endp
end main
您已经在尝试从小写更改为大写,所以,我会帮助您删除标点符号。接下来的代码使用我的建议:将大写字母移动到辅助字符串中,忽略标点符号。我用的是 EMU8086 编译器 :
.stack 100h
.data
source db "STRING, WITH. PUNCTUATION : AND * SPACES!$"
aux db " "
.code
mov ax, @data
mov ds, ax
;REMOVE EVERYTHING BUT UPPERCASE LETTERS.
mov si, offset source ; POINT TO STRING.
mov di, offset aux ; POINT TO AUXILIARY.
L1:
mov al, [ si ] ; get character from source
;CHECK IF END STRING ($).
cmp al, '$'
je finale
;CHECK IF CHAR IS UPPERCASE LETTER.
cmp al, 65
jb is_not_a_letter ; CHAR IS LOWER THAN 'A'.
cmp al, 90
ja is_not_a_letter ; CHAR IS HIGHER THAN 'Z'.
;COPY LETTER TO AUX STRING.
mov [ di ], al
inc di ; POSITION FOR NEXT CHARACTER.
is_not_a_letter:
inc si ; move to next character
jmp L1
finale:
mov [ di ], al ; '$', NECESSARY TO PRINT.
;PRINT STRING.
mov dx, OFFSET aux
mov ah, 9
int 21h
;END PROGRAM.
mov ax, 4c00h
int 21h
我以 '$' 结束字符串,因为我用 int 21h
.
打印字符串
如您所见,我既没有使用 CX 也没有使用 LOOP 指令。我所做的是重复直到找到“$”。您可以这样做,直到找到 0。
这是我删除所有标点符号并将其转为大写后的代码。
INCLUDE Irvine32.inc
.data
source byte "STriNG, @# WITH.[][][ lalalala PUncTuATION : AND * SpaceS!", 0
target byte SIZEOF source DUP(0), 0
.code
main PROC
pushad
mov edx, offset source
call WriteString
call CrlF
mov edx, 0
mov esi, offset source
mov edi, offset target
L1:
mov al, [ esi ] ; get character from source
cmp al, 0
je final
cmp al, 65
jb not_letter ; if char is lower than 'A' jump to not letter
cmp al, 122
ja not_letter ; if char is greater than 'z' jump to not letter
cmp al, 90
ja Label1 ; jump if above 'Z'
jmp next ; false
Label1:
cmp al, 97
jl Label2 ; jmp if less than 'a'
jmp next ; false
Label2: ; if both are true than is greater than 'Z' but less than 'a'
jmp not_letter ; jump to not letter
next:
mov [ edi ], al
inc di ; position to next character.
not_letter:
inc si ; move to next character
jmp L1
final:
mov [ edi ], al
mov edx, OFFSET target
mov ah, 9
call WriteString
call CrlF
mov esi,0 ; index register
mov ecx,SIZEOF source ; loop counter
L2:
mov al, target[esi] ; get a character from source
and target[esi], 11011111b ; convert lower case to upper case
inc esi ; move to next character
loop L2 ; repeat for entire string
mov edx, OFFSET target
call WriteString
call CrlF
popad
exit
main endp
end main
我有这样的输入:
This is, ,,, *&% a ::; demo + String. +Need to**@!/// format:::::!!! this.`
需要输出:
ThisisademoStringNeedtoformatthis
我必须不使用使用str_trim。
编辑:我正在编写一个加密程序。在加密之前,我必须从字符串中删除所有标点符号并将所有小写字母转换为大写字母。
我添加了代码。在将其变为大写之前,我需要删除空格或任何标点符号。到目前为止,除了我们不允许使用的 str_trim 之外,我还没有在我的书中找到任何可以帮助解决这个问题的东西。
INCLUDE Irvine32.inc
.data
source byte "This is the source string",0
.code
main proc
mov esi,0 ; index register
mov ecx,SIZEOF source ; loop counter
L1:
mov al,source[esi] ; get a character from source
and source[esi], 11011111b ; convert lower case to upper case
inc esi ; move to next character
loop L1 ; repeat for entire string
mov edx, OFFSET source
call WriteString
exit
main endp
end main
您已经在尝试从小写更改为大写,所以,我会帮助您删除标点符号。接下来的代码使用我的建议:将大写字母移动到辅助字符串中,忽略标点符号。我用的是 EMU8086 编译器 :
.stack 100h
.data
source db "STRING, WITH. PUNCTUATION : AND * SPACES!$"
aux db " "
.code
mov ax, @data
mov ds, ax
;REMOVE EVERYTHING BUT UPPERCASE LETTERS.
mov si, offset source ; POINT TO STRING.
mov di, offset aux ; POINT TO AUXILIARY.
L1:
mov al, [ si ] ; get character from source
;CHECK IF END STRING ($).
cmp al, '$'
je finale
;CHECK IF CHAR IS UPPERCASE LETTER.
cmp al, 65
jb is_not_a_letter ; CHAR IS LOWER THAN 'A'.
cmp al, 90
ja is_not_a_letter ; CHAR IS HIGHER THAN 'Z'.
;COPY LETTER TO AUX STRING.
mov [ di ], al
inc di ; POSITION FOR NEXT CHARACTER.
is_not_a_letter:
inc si ; move to next character
jmp L1
finale:
mov [ di ], al ; '$', NECESSARY TO PRINT.
;PRINT STRING.
mov dx, OFFSET aux
mov ah, 9
int 21h
;END PROGRAM.
mov ax, 4c00h
int 21h
我以 '$' 结束字符串,因为我用 int 21h
.
如您所见,我既没有使用 CX 也没有使用 LOOP 指令。我所做的是重复直到找到“$”。您可以这样做,直到找到 0。
这是我删除所有标点符号并将其转为大写后的代码。
INCLUDE Irvine32.inc
.data
source byte "STriNG, @# WITH.[][][ lalalala PUncTuATION : AND * SpaceS!", 0
target byte SIZEOF source DUP(0), 0
.code
main PROC
pushad
mov edx, offset source
call WriteString
call CrlF
mov edx, 0
mov esi, offset source
mov edi, offset target
L1:
mov al, [ esi ] ; get character from source
cmp al, 0
je final
cmp al, 65
jb not_letter ; if char is lower than 'A' jump to not letter
cmp al, 122
ja not_letter ; if char is greater than 'z' jump to not letter
cmp al, 90
ja Label1 ; jump if above 'Z'
jmp next ; false
Label1:
cmp al, 97
jl Label2 ; jmp if less than 'a'
jmp next ; false
Label2: ; if both are true than is greater than 'Z' but less than 'a'
jmp not_letter ; jump to not letter
next:
mov [ edi ], al
inc di ; position to next character.
not_letter:
inc si ; move to next character
jmp L1
final:
mov [ edi ], al
mov edx, OFFSET target
mov ah, 9
call WriteString
call CrlF
mov esi,0 ; index register
mov ecx,SIZEOF source ; loop counter
L2:
mov al, target[esi] ; get a character from source
and target[esi], 11011111b ; convert lower case to upper case
inc esi ; move to next character
loop L2 ; repeat for entire string
mov edx, OFFSET target
call WriteString
call CrlF
popad
exit
main endp
end main