当必须使用循环命令时,是否有办法增加循环命令的范围

Is there anyway to increase the range of the loop command when a loop command must be used

outerLoop:
    skipCarry:
        mov     eax, 2
        mov     inCompare, eax   ;Set our inCompare to 2
        mov     eax, outCompare 
        push    ecx ;Save status of these registers before entering the innerLoop
        push    eax
        mov     ecx, innerLoopCount

    isComposite:
        mov     eax, outCompare ;Value to be checked for composite
        mov     edx, 0  ;Make sure edx does not have a left over value
        div     inCompare   ;Divide outCompare by inCompare to see if we get a remainder of 0 in edx
        cmp     edx, 0
        jne     skipPrint   ;If the remainder is not 0, we have not proven that the number in question is composite, so do not print
        ;Print out a composite value if its been proven
        mov     eax, outCompare
        call    WriteDec
        mov     edx, OFFSET spaces
        call    WriteString     ;Write spaces between the composites
        mov     ebx, writeCounter
        inc     ebx     ;This will help to make sure we start a new line after writing 10 composites
        mov     writeCounter, ebx
        cmp     ebx, 10     ;If counter is at 10, we start a new line
        jne     exitInnerLoop   ;exit the inner loop because we found that the number in question is composite
        call    CrLf
        mov     writeCounter, 0
        jmp     exitInnerLoop       ;exit the inner loop because we found that the number in question is composite

        skipPrint:      ;Jumped to if the number in question was not confirmed to be composite

        ;This is to continue testing for composite of the number in question by incrementing the divisor
        mov     ebx, inCompare
        inc     ebx
        mov     eax, outCompare
        cmp     eax, ebx
        je      exitInnerLoop
        mov     inCompare, ebx

        skipIncrement:  ;Will be jumped to if there are no new divisors to be tested

        loop    isComposite     ;Retest with new divisior if there is a possibility
        exitInnerLoop:

    pop     eax     ;restore outer loop status
    pop     ecx
    inc     eax     ;Next number to check for being composite
    mov     outCompare, eax
    loop    outerLoop

我收到错误代码,指出跳转太长了 5 个字节,指的是 outerLoop。我知道只使用 jmp 命令并自己跟踪计数器会更简单,但我的家庭作业要求改为使用循环。有没有办法扩展循环的范围,或者有什么方法可以将循环中的内容缩小 5 个我没有看到的字节?

这些循环应该检查一个数字是否是合数,这样才能知道其功能。

不,loop/loopne各只有一种编码,使用SHORT(有符号8位rel8)相对位移。

x86 因为 386 有 jcc rel32 aka NEAR conditional branches as well as jcc rel8。如果你需要rel8跳得更远,loop是小代码大小的错误选择。 dec ecx / jnz 基本上是等价的,只是它破坏了一些标志。

代码大小是您应该使用 loop 的唯一原因之一; it's slow on everything except recent AMD)。 (速度 ~= 8086 上的代码大小,所以它在那里很有用。后来的 CPU 故意让它变慢,以便与驱动程序中的(坏?)延迟循环兼容。


如果你真的想用loop跳得更远,你可以这样做:

    loop  .stay_in_loop

    jmp   .leave_loop
.stay_in_loop:
    jmp .far_away
.leave_loop:

.stay_in_loop 块可以在函数结束之后,或者其他从未到达但在 127B 以内的地方,这样您就可以省略 jmp .leave_loop


你也可以"cheat"并把一些循环体放在外线。例如您可以将 Print 块移动到函数结束之后(或开始之前),然后将 jcc 移动到它,将 jmp 移回循环内部。


使用两个嵌套的 loop 指令简直是脑残,但似乎你必须使用它。 push/pop 到 save/restore 外循环的 ecx 不是必需的;您可以使用另一个寄存器和 mov ecx, esi 或其他东西。无论哪种方式都很糟糕,因为推送或 mov esi, ecx 必须位于循环的顶部。除非你通过自己做 dec esi 而不是复制回 loop 产生的值来伪造它。这使代码更简洁,但可能被认为是欺骗/破坏了要求的目的。 :P

也许您作业的真正目标是让您针对代码大小进行优化,以便短跳可以达到。对于您的情况,这绝对是提高整体代码质量的正确方法。

初学者代码效率低下很痛苦是正常的,所以这里有很多简单的局部优化。例如. Use test edx,edx to compare against zero.

此外,将变量保存在寄存器中。 mov writeCounter, 0 之类的指令使用 disp32 寻址模式汇编为 mov r/m32, imm32。所以这是 8 个字节的地址和数据,加上 2 个字节的操作码 + ModR/M。每个绝对地址而不是寄存器操作数都需要 4 个字节。

如果您需要在循环中保存更多的代码大小,您可以将内容复制到堆栈并使用 [ebp+disp8] 寻址模式访问它们(超出 ModR/M 1 个字节而不是 4 个) .

要释放更多寄存器用于循环,push/pop 它们位于函数的 start/end。


问题中代码的清理版本

外层loop的位移是0xc9,即-55。所以我的版本比你的版本小一半。 (在循环外 ResetWriteCounter 中有 12 个字节的代码。)

    push   ebx
    push   ebp       ; we're NOT using it as a frame pointer
    push   edi
    push   esi


;;; Important to keep in registers
; edi = inCompare = trial divisor
; ebx = outCompare = prime/composite candidate

;; nice to keep in registers
; ebp = WriteCounter  (but done as a down-counter with dec instead of inc/cmp)
; esi = outer loop counter

;; these variable names are kinda bad, IMO.
;; candidate / divisor would be better names.

; ecx = inner loop counter
; eax,edx = scratch for DIV


    mov    ebx, outCompare    ; first composite candidate

    mov    ebp, 10            ; down-counter to trigger newlines
    push   '  '          ; two spaces followed by 2 zero bytes (i.e. null-terminated string)
   ; Use mov edx, esp instead of mov edx, OFFSET spaces in the print block

   ;;; note that code before this isn't part of the size that LOOP has to jump over.

outerLoop:
        test    bl, 1
        jz      Print    ;; multiple of 2
        ;; inner loop now only has to check the odd divisors

        mov     edi, 3    ; first trial divisor = first odd prime (other than 1)
        mov     ecx, innerLoopCount     ; ideally set to sqrt(ebx), e.g. cvtsi2ss xmm0, ebx / sqrtss xmm0,xmm0 / cvtss2si xmm0, ecx might be correct :P
                   ; could instead use ecx = ebx directly if you don't care about speed, so try divisors all the way up to the candidate.

    ;; inner loop
    isComposite:
        mov     eax, ebx         ; edx:eax = dividend = candidate
        xor     edx, edx
        div     edi              ; trial division

        add     edi, 2           ; next trial divisor = next odd number
        test    edx, edx         ; equivalent to cmp edx,0 to check remainder
        jz      Print            ; you could maybe fold this into a LOOPNZ and check ZF after the loop
   ;If the remainder is not 0, we have not proven that the number in question is composite, so do not print

        ;;cmp     edi, ebx        ;  divisor, candidate
        ;;jae     exitInnerLoop   ; what's the point of this?  isn't ecx set so that loop falls through after this many iterations?

        loop   isComposite     ;Retest with new divisor if there is a possibility

        jmp     NoPrint        ; else it was prime

;;; I put this outside the inner loop in the first place instead of needing to multiple jumps out of this to exitInnerLoop
    Print:   ;Print out a composite value if its been proven
        mov     eax, ebx
        call    WriteDec

        dec     ebp              ; new line when the down-counter reaches zero
        jz      ResetWriteCounter

        ; print spaces only if not printing a newline
        mov     edx, esp        ; OFFSET spaces;  Use stack memory we pushed earlier
        call    WriteString     ;Write spaces between the composites

    ;; tail of outer loop.  Prime numbers jump here without printing.
    exitInnerLoop:
    NoPrint:
        inc     ebx         ;Next number to check for being composite
                            ; TODO: increment by 2 and don't even check the even numbers, just print them after every odd prime/composite

        mov     ecx, esi    ; ecx = outer loop counter for the LOOP insn
        dec     esi         ; update outer counter instead of having  mov esi, ecx at the top of the loop
        loop    outerLoop

    add    esp, 4    ; clear the '  ' we pushed earlier.  pop esi  is smaller and about as efficiently

    pop    esi
    pop    edi
    pop    ebp
    pop    ebx
    ret

  ; after the end of the function is a good place for infrequently-use blocks that you jump to and then jump back.
 ResetWriteCounter:
        mov     ebp, 10         ; reset counter
        ; start a new line after writing 10 composites
        call    CrLf
        ; could have done this branchlessly by selecting a string of '  ',0 or 13,10,0 based on ebp being zero
        ; but doing the ebp update branchlessly too is worse than this highly predictable branch.  Maybe if the count was a power of 2, DEC/AND

        jmp  NoPrint          ; back where we came from, skipping the write spaces
       ; alternative: duplicate inc / mov / dec / loop block here

您可以在分支结构方面做更多的事情。例如我想在退出循环后消除 jmp NoPrint 。这看起来很笨拙。将 Print 块放在其他地方就可以解决问题,但是 Print 必须跳跃而不是掉下去。

TODO:使用 ecx 作为试验除数。不过,倒数可能比向上倒数慢得多,因为小素数更常见。还需要避免1.


我本来打算只应用一些局部优化,但是当我理解了你的分支结构时,我已经很开心地在代码大小上打高尔夫球了。代码高尔夫中的高尔夫=最少的字节获胜。

查看我的一些 x86 machine code golf answers 代码大小技巧(包括以牺牲速度为代价,有时会牺牲很多):