如何按数量级估计我的示例代码的时间消耗
How to estimate time consumption of my sample code by order of magnitude
我写了一段代码来计算 PI 使用 Monte Carlo 方法 运行ning 在 2013 mac book air 上使用 1.7 GHz Intel Core i7(似乎是 4650U)。
循环次数为10^8时用时2~3秒,循环次数为10^9时用时约25秒
import Foundation
func randomNumber(lowerBound:Double, upperBound:Double) -> Double {
return lowerBound + Double(rand()) / Double(RAND_MAX) * (upperBound - lowerBound)
}
let pointNumber = 1000000000
var pointInsideCount = 0
for i in 0...pointNumber {
let x = randomNumber(-1.0, upperBound:1.0)
let y = randomNumber(-1.0, upperBound:1.0)
if x*x+y*y <= 1 {
pointInsideCount += 1
}
}
let result = Double(pointInsideCount) / Double(pointNumber) * 4
let piString = String(format: "%.50f", result)
print("Pi is \(piString)")
I 运行 "di -n randomNumber" 获取randomNumber函数的assemble代码
swiftTest`swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double:
0x10023c160 <+0>: pushq %rbp
0x10023c161 <+1>: movq %rsp, %rbp
0x10023c164 <+4>: subq [=11=]x20, %rsp
0x10023c168 <+8>: movsd %xmm0, -0x8(%rbp)
0x10023c16d <+13>: movsd %xmm1, -0x10(%rbp)
0x10023c172 <+18>: movsd %xmm0, -0x18(%rbp)
0x10023c177 <+23>: movsd %xmm1, -0x20(%rbp)
0x10023c17c <+28>: callq 0x10027585e ; symbol stub for: rand
0x10023c181 <+33>: movsd 0x3bc1f(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 352
0x10023c189 <+41>: cvtsi2sdl %eax, %xmm1
0x10023c18d <+45>: divsd %xmm0, %xmm1
0x10023c191 <+49>: movsd -0x20(%rbp), %xmm0
0x10023c196 <+54>: movsd -0x18(%rbp), %xmm2
0x10023c19b <+59>: subsd %xmm2, %xmm0
0x10023c19f <+63>: mulsd %xmm0, %xmm1
0x10023c1a3 <+67>: addsd %xmm1, %xmm2
0x10023c1a7 <+71>: movaps %xmm2, %xmm0
0x10023c1aa <+74>: addq [=11=]x20, %rsp
0x10023c1ae <+78>: popq %rbp
0x10023c1af <+79>: retq
和运行“di -f”得到孔文件的assemble代码
swiftTest`main:
0x10023bcd0 <+0>: pushq %rbp
0x10023bcd1 <+1>: movq %rsp, %rbp
0x10023bcd4 <+4>: subq [=12=]x120, %rsp
0x10023bcdb <+11>: leaq 0x9340e(%rip), %rax ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
0x10023bce2 <+18>: leaq 0x933ff(%rip), %rcx ; static Swift.Process._argc : Swift.Int32
0x10023bce9 <+25>: movl %edi, (%rcx)
0x10023bceb <+27>: cmpq $-0x1, (%rax)
0x10023bcf2 <+34>: movq %rsi, -0x60(%rbp)
0x10023bcf6 <+38>: je 0x10023bd0e ; <+62> at main.swift
0x10023bcf8 <+40>: leaq 0x933f1(%rip), %rdi ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
0x10023bcff <+47>: leaq -0x99d56(%rip), %rax ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func6
0x10023bd06 <+54>: movq %rax, %rsi
0x10023bd09 <+57>: callq 0x100266870 ; swift_once
0x10023bd0e <+62>: leaq 0x933e3(%rip), %rax ; static Swift.Process._unsafeArgv : Swift.UnsafeMutablePointer<Swift.UnsafeMutablePointer<Swift.Int8>>
0x10023bd15 <+69>: movq -0x60(%rbp), %rcx
0x10023bd19 <+73>: movq %rcx, (%rax)
0x10023bd1c <+76>: movq [=12=]x989680, 0x93499(%rip) ; lazy cache variable for type metadata for Swift.VaListBuilder + 4
0x10023bd27 <+87>: movq [=12=]x0, 0x93496(%rip) ; swiftTest.pointNumber : Swift.Int + 4
0x10023bd32 <+98>: movq 0x93487(%rip), %rax ; swiftTest.pointNumber : Swift.Int
0x10023bd39 <+105>: movq %rax, -0x68(%rbp)
0x10023bd3d <+109>: xorl %eax, %eax
0x10023bd3f <+111>: movl %eax, %ecx
0x10023bd41 <+113>: movq -0x68(%rbp), %rdx
0x10023bd45 <+117>: cmpq %rdx, %rcx
0x10023bd48 <+120>: setle %sil
0x10023bd4c <+124>: testb [=12=]x1, %sil
0x10023bd50 <+128>: jne 0x10023bd54 ; <+132> at main.swift:17
0x10023bd52 <+130>: jmp 0x10023bdb3 ; <+227> at main.swift:17
0x10023bd54 <+132>: movq -0x68(%rbp), %rax
0x10023bd58 <+136>: incq %rax
0x10023bd5b <+139>: seto %cl
0x10023bd5e <+142>: movq -0x68(%rbp), %rdx
0x10023bd62 <+146>: cmpq %rdx, %rax
0x10023bd65 <+149>: setg %sil
0x10023bd69 <+153>: testb [=12=]x1, %sil
0x10023bd6d <+157>: movb %cl, -0x69(%rbp)
0x10023bd70 <+160>: jne 0x10023bd74 ; <+164> at main.swift:17
0x10023bd72 <+162>: jmp 0x10023bd87 ; <+183> at main.swift:17
0x10023bd74 <+164>: movq -0x68(%rbp), %rax
0x10023bd78 <+168>: incq %rax
0x10023bd7b <+171>: seto %cl
0x10023bd7e <+174>: movq %rax, -0x78(%rbp)
0x10023bd82 <+178>: movb %cl, -0x79(%rbp)
0x10023bd85 <+181>: jmp 0x10023bddf ; <+271> at main.swift:17
0x10023bd87 <+183>: leaq 0x418a2(%rip), %rdi ; "fatal error"
0x10023bd8e <+190>: movl [=12=]xb, %eax
0x10023bd93 <+195>: movl %eax, %esi
0x10023bd95 <+197>: movl [=12=]x2, %eax
0x10023bd9a <+202>: leaq 0x487af(%rip), %rcx ; "Range end index has no valid successor"
0x10023bda1 <+209>: movl [=12=]x26, %edx
0x10023bda6 <+214>: movl %edx, %r8d
0x10023bda9 <+217>: movl %eax, %edx
0x10023bdab <+219>: movl %eax, %r9d
0x10023bdae <+222>: callq 0x1001a80f0 ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
0x10023bdb3 <+227>: leaq 0x41876(%rip), %rdi ; "fatal error"
0x10023bdba <+234>: movl [=12=]xb, %eax
0x10023bdbf <+239>: movl %eax, %esi
0x10023bdc1 <+241>: movl [=12=]x2, %eax
0x10023bdc6 <+246>: leaq 0x48753(%rip), %rcx ; "Can't form Range with end < start"
0x10023bdcd <+253>: movl [=12=]x21, %edx
0x10023bdd2 <+258>: movl %edx, %r8d
0x10023bdd5 <+261>: movl %eax, %edx
0x10023bdd7 <+263>: movl %eax, %r9d
0x10023bdda <+266>: callq 0x1001a80f0 ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
0x10023bddf <+271>: leaq -0x30(%rbp), %rdi
0x10023bde3 <+275>: leaq -0x20(%rbp), %rsi
0x10023bde7 <+279>: movq [=12=]x0, -0x20(%rbp)
0x10023bdef <+287>: movq -0x78(%rbp), %rax
0x10023bdf3 <+291>: movq %rax, -0x18(%rbp)
0x10023bdf7 <+295>: callq 0x1000362e0 ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.Range.generate <A where A: Swift.ForwardIndexType> (Swift.Range<A>)() -> Swift.RangeGenerator<A>
0x10023bdfc <+300>: movq -0x30(%rbp), %rax
0x10023be00 <+304>: movq -0x28(%rbp), %rsi
0x10023be04 <+308>: movq %rax, -0x10(%rbp)
0x10023be08 <+312>: movq %rsi, -0x8(%rbp)
0x10023be0c <+316>: leaq -0x40(%rbp), %rdi
0x10023be10 <+320>: leaq -0x10(%rbp), %rsi
0x10023be14 <+324>: callq 0x100036960 ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.RangeGenerator.next <A where A: Swift.ForwardIndexType> (inout Swift.RangeGenerator<A>)() -> Swift.Optional<A>
0x10023be19 <+329>: movq -0x40(%rbp), %rsi
0x10023be1d <+333>: movb -0x38(%rbp), %al
0x10023be20 <+336>: xorb [=12=]x1, %al
0x10023be22 <+338>: testb [=12=]x1, %al
0x10023be24 <+340>: movq %rsi, -0x88(%rbp)
0x10023be2b <+347>: jne 0x10023be32 ; <+354> at main.swift:17
0x10023be2d <+349>: jmp 0x10023bed4 ; <+516> at main.swift:23
0x10023be32 <+354>: movsd 0x3bf66(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
0x10023be3a <+362>: movsd 0x3bf56(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be42 <+370>: movq -0x88(%rbp), %rax
0x10023be49 <+377>: movq %rax, -0x48(%rbp)
0x10023be4d <+381>: callq 0x10023c160 ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
0x10023be52 <+386>: movsd 0x3bf46(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
0x10023be5a <+394>: movsd 0x3bf36(%rip), %xmm2 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be62 <+402>: movsd %xmm0, -0x50(%rbp)
0x10023be67 <+407>: movsd %xmm0, -0x90(%rbp)
0x10023be6f <+415>: movaps %xmm1, %xmm0
0x10023be72 <+418>: movaps %xmm2, %xmm1
0x10023be75 <+421>: callq 0x10023c160 ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
0x10023be7a <+426>: movsd 0x3bf16(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be82 <+434>: movsd %xmm0, -0x58(%rbp)
0x10023be87 <+439>: movsd -0x90(%rbp), %xmm2
0x10023be8f <+447>: mulsd %xmm2, %xmm2
0x10023be93 <+451>: mulsd %xmm0, %xmm0
0x10023be97 <+455>: addsd %xmm0, %xmm2
0x10023be9b <+459>: ucomisd %xmm2, %xmm1
0x10023be9f <+463>: jb 0x10023becf ; <+511> at main.swift:23
0x10023bea1 <+465>: movq 0x93320(%rip), %rax ; swiftTest.pointInsideCount : Swift.Int
0x10023bea8 <+472>: incq %rax
0x10023beab <+475>: seto %cl
0x10023beae <+478>: movq %rax, -0x98(%rbp)
0x10023beb5 <+485>: movb %cl, -0x99(%rbp)
0x10023bebb <+491>: jo 0x10023c155 ; <+1157> at main.swift:21
0x10023bec1 <+497>: movq -0x98(%rbp), %rax
0x10023bec8 <+504>: movq %rax, 0x932f9(%rip) ; swiftTest.pointInsideCount : Swift.Int
0x10023becf <+511>: jmp 0x10023be0c ; <+316> at main.swift:17
0x10023bed4 <+516>: movsd 0x3beb4(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 328
0x10023bedc <+524>: cvtsi2sdq 0x932e3(%rip), %xmm1 ; swiftTest.pointInsideCount : Swift.Int
0x10023bee5 <+533>: cvtsi2sdq 0x932d2(%rip), %xmm2 ; swiftTest.pointNumber : Swift.Int
0x10023beee <+542>: divsd %xmm2, %xmm1
0x10023bef2 <+546>: mulsd %xmm0, %xmm1
0x10023bef6 <+550>: movsd %xmm1, 0x932d2(%rip) ; swiftTest.result : Swift.Double
0x10023befe <+558>: callq 0x10023c1b0 ; type metadata accessor for Swift.CVarArgType
0x10023bf03 <+563>: movl [=12=]x1, %ecx
0x10023bf08 <+568>: movl %ecx, %edi
0x10023bf0a <+570>: movq %rax, %rsi
0x10023bf0d <+573>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bf12 <+578>: leaq 0x4865e(%rip), %rdi ; "%.50f"
0x10023bf19 <+585>: movl [=12=]x5, %ecx
0x10023bf1e <+590>: movl %ecx, %esi
0x10023bf20 <+592>: movl [=12=]x1, %ecx
0x10023bf25 <+597>: movq %rdx, -0xa8(%rbp)
0x10023bf2c <+604>: movl %ecx, %edx
0x10023bf2e <+606>: movq %rax, -0xb0(%rbp)
0x10023bf35 <+613>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023bf3a <+618>: leaq 0x667b7(%rip), %rsi ; protocol witness table for Swift.Double : Swift.CVarArgType in Swift
0x10023bf41 <+625>: leaq 0x6a258(%rip), %rdi ; direct type metadata for Swift.Double
0x10023bf48 <+632>: addq [=12=]x8, %rdi
0x10023bf4f <+639>: movq -0xa8(%rbp), %r8
0x10023bf56 <+646>: movq %rdi, 0x18(%r8)
0x10023bf5a <+650>: movq %rsi, 0x20(%r8)
0x10023bf5e <+654>: movsd 0x9326a(%rip), %xmm0 ; swiftTest.result : Swift.Double
0x10023bf66 <+662>: movsd %xmm0, (%r8)
0x10023bf6b <+667>: movq %rax, %rdi
0x10023bf6e <+670>: movq %rdx, %rsi
0x10023bf71 <+673>: movq %rcx, %rdx
0x10023bf74 <+676>: movq -0xb0(%rbp), %rcx
0x10023bf7b <+683>: callq 0x10002dfa0 ; ext.Foundation.Swift.String.init (Swift.String.Type)(format : Swift.String, Swift.Array<Swift.CVarArgType>...) -> Swift.String
0x10023bf80 <+688>: movq %rax, 0x93251(%rip) ; swiftTest.piString : Swift.String
0x10023bf87 <+695>: movq %rdx, 0x93252(%rip) ; swiftTest.piString : Swift.String + 8
0x10023bf8e <+702>: movq %rcx, 0x93253(%rip) ; swiftTest.piString : Swift.String + 16
-> 0x10023bf95 <+709>: callq 0x10023c200 ; type metadata accessor for protocol<>
0x10023bf9a <+714>: movl [=12=]x1, %r9d
0x10023bfa0 <+720>: movl %r9d, %edi
0x10023bfa3 <+723>: movq %rax, %rsi
0x10023bfa6 <+726>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bfab <+731>: movl [=12=]x3, %r9d
0x10023bfb1 <+737>: movl %r9d, %edi
0x10023bfb4 <+740>: leaq 0x6fe25(%rip), %rcx ; direct type metadata for Swift.String
0x10023bfbb <+747>: addq [=12=]x8, %rcx
0x10023bfc2 <+754>: movq %rcx, 0x18(%rdx)
0x10023bfc6 <+758>: movq %rcx, %rsi
0x10023bfc9 <+761>: movq %rax, -0xb8(%rbp)
0x10023bfd0 <+768>: movq %rdx, -0xc0(%rbp)
0x10023bfd7 <+775>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bfdc <+780>: leaq 0x4859a(%rip), %rdi ; "Pi is "
0x10023bfe3 <+787>: movl [=12=]x6, %r9d
0x10023bfe9 <+793>: movl %r9d, %esi
0x10023bfec <+796>: movl [=12=]x1, %r9d
0x10023bff2 <+802>: movq %rdx, -0xc8(%rbp)
0x10023bff9 <+809>: movl %r9d, %edx
0x10023bffc <+812>: movq %rax, -0xd0(%rbp)
0x10023c003 <+819>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023c008 <+824>: movq %rax, %rdi
0x10023c00b <+827>: movq %rdx, %rsi
0x10023c00e <+830>: movq %rcx, %rdx
0x10023c011 <+833>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c016 <+838>: movq -0xc8(%rbp), %rsi
0x10023c01d <+845>: movq %rax, (%rsi)
0x10023c020 <+848>: movq %rdx, 0x8(%rsi)
0x10023c024 <+852>: movq %rcx, 0x10(%rsi)
0x10023c028 <+856>: movq 0x931a9(%rip), %rdi ; swiftTest.piString : Swift.String
0x10023c02f <+863>: movq 0x931aa(%rip), %rsi ; swiftTest.piString : Swift.String + 8
0x10023c036 <+870>: movq 0x931ab(%rip), %rax ; swiftTest.piString : Swift.String + 16
0x10023c03d <+877>: movq %rdi, -0xd8(%rbp)
0x10023c044 <+884>: movq %rax, %rdi
0x10023c047 <+887>: movq %rsi, -0xe0(%rbp)
0x10023c04e <+894>: movq %rax, -0xe8(%rbp)
0x10023c055 <+901>: callq 0x100268160 ; swift_unknownRetain
0x10023c05a <+906>: movq -0xd8(%rbp), %rdi
0x10023c061 <+913>: movq -0xe0(%rbp), %rsi
0x10023c068 <+920>: movq -0xe8(%rbp), %rdx
0x10023c06f <+927>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c074 <+932>: leaq 0x40d15(%rip), %rdi ; ""
0x10023c07b <+939>: xorl %r9d, %r9d
0x10023c07e <+942>: movl %r9d, %esi
0x10023c081 <+945>: movl [=12=]x1, %r9d
0x10023c087 <+951>: movq -0xc8(%rbp), %r8
0x10023c08e <+958>: movq %rax, 0x18(%r8)
0x10023c092 <+962>: movq %rdx, 0x20(%r8)
0x10023c096 <+966>: movq %rcx, 0x28(%r8)
0x10023c09a <+970>: movl %r9d, %edx
0x10023c09d <+973>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023c0a2 <+978>: movq %rax, %rdi
0x10023c0a5 <+981>: movq %rdx, %rsi
0x10023c0a8 <+984>: movq %rcx, %rdx
0x10023c0ab <+987>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c0b0 <+992>: movq -0xc8(%rbp), %rsi
0x10023c0b7 <+999>: movq %rax, 0x30(%rsi)
0x10023c0bb <+1003>: movq %rdx, 0x38(%rsi)
0x10023c0bf <+1007>: movq %rcx, 0x40(%rsi)
0x10023c0c3 <+1011>: movq -0xd0(%rbp), %rdi
0x10023c0ca <+1018>: callq 0x1000470c0 ; Swift.String.init (Swift.String.Type)(stringInterpolation : Swift.Array<Swift.String>...) -> Swift.String
0x10023c0cf <+1023>: movq -0xc0(%rbp), %rsi
0x10023c0d6 <+1030>: movq %rax, (%rsi)
0x10023c0d9 <+1033>: movq %rdx, 0x8(%rsi)
0x10023c0dd <+1037>: movq %rcx, 0x10(%rsi)
0x10023c0e1 <+1041>: callq 0x10012aa70 ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 1)
0x10023c0e6 <+1046>: movq %rax, -0xf0(%rbp)
0x10023c0ed <+1053>: movq %rdx, -0xf8(%rbp)
0x10023c0f4 <+1060>: movq %rcx, -0x100(%rbp)
0x10023c0fb <+1067>: callq 0x10012aa90 ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 2)
0x10023c100 <+1072>: movq -0xb8(%rbp), %rdi
0x10023c107 <+1079>: movq -0xf0(%rbp), %rsi
0x10023c10e <+1086>: movq -0xf8(%rbp), %r8
0x10023c115 <+1093>: movq %rdx, -0x108(%rbp)
0x10023c11c <+1100>: movq %r8, %rdx
0x10023c11f <+1103>: movq -0x100(%rbp), %r10
0x10023c126 <+1110>: movq %rcx, -0x110(%rbp)
0x10023c12d <+1117>: movq %r10, %rcx
0x10023c130 <+1120>: movq %rax, %r8
0x10023c133 <+1123>: movq -0x108(%rbp), %r9
0x10023c13a <+1130>: movq -0x110(%rbp), %rax
0x10023c141 <+1137>: movq %rax, (%rsp)
0x10023c145 <+1141>: callq 0x10012aab0 ; Swift.print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()
0x10023c14a <+1146>: xorl %eax, %eax
0x10023c14c <+1148>: addq [=12=]x120, %rsp
0x10023c153 <+1155>: popq %rbp
0x10023c154 <+1156>: retq
0x10023c155 <+1157>: ud2
0x10023c157 <+1159>: nopw (%rax,%rax)
我可以如下估算时间消耗吗?
randomNumber 函数包含大约 20 条指令,因此 x 和 y 的计算包含大约 40 条指令。 pointInsideCount的加入只是执行几条指令,所以for循环中大概有4~5打指令(假设是50条)。 for循环外的时间消耗可以忽略
如果我假设这个程序平均每个周期4560U 运行 2条指令,当循环计数为10^8时,空洞时间消耗约为50 * 10^8 / (1.7 * 10^9 * 2)
您不能假设所有循环都使用相同的 IPC。当然那个循环 运行s 2 IPC,但这并没有告诉你关于其他循环的任何信息。您必须仔细分析代码以找到瓶颈和并行度。
如果您可以安全地假设没有缓存未命中或分支预测错误,您可以通过英特尔的静态代码分析器 using IACA 为特定英特尔微体系结构的小循环获得合理的周期计数估计。它远非真实硬件的完整周期精确模拟,但它确实有自己的模型来将 uops 分配到端口。它通常会得到合理的数字。
您也可以使用 Agner Fog's 指令表和微架构指南手动进行相同类型的分析(包括 IACA 不了解的 CPU)。
当循环在循环携带的依赖链的延迟上或仅使一个执行端口饱和时出现瓶颈时,事情通常会非常准确地进行。
在高吞吐量下,有许多细微的影响可能会瓶颈代码,而您希望 运行 每时钟 4 个融合域微指令。前端只能维持相当小的循环(~28 或 56 微指令),因为由于微指令缓存行边界和微指令不以 4 为一组,即使微指令缓存也具有有限的吞吐量。
Significant FMA performance anomaly experienced in the Intel Broadwell processor 是一个很好的例子,说明事情是如何变得非常难以理解的。您可能希望代码使所有三个向量执行端口都饱和,它在 Haswell 上和几乎在 Skylake 上都是这样,但在 Broadwell 上什至没有关闭。这甚至不是前端瓶颈,因为循环足够小,可以放入循环缓冲区。
同样,所有这些都没有考虑分支预测错误或缓存未命中。
如果这一切听起来真的很难和复杂,那是因为它确实如此。这就是基准测试比静态分析更有用的原因。但是,微基准测试 确实 很容易出错。您应该查看 asm 以确保您没有搞砸并让编译器优化掉您想要测试的东西。您还需要了解很多有关 CPU 工作原理的知识,以避免陷阱,例如将其他缓慢的东西放入您的微基准测试中,并让它支配 运行-time 而不是您想要测试的东西。
我写了一段代码来计算 PI 使用 Monte Carlo 方法 运行ning 在 2013 mac book air 上使用 1.7 GHz Intel Core i7(似乎是 4650U)。 循环次数为10^8时用时2~3秒,循环次数为10^9时用时约25秒
import Foundation
func randomNumber(lowerBound:Double, upperBound:Double) -> Double {
return lowerBound + Double(rand()) / Double(RAND_MAX) * (upperBound - lowerBound)
}
let pointNumber = 1000000000
var pointInsideCount = 0
for i in 0...pointNumber {
let x = randomNumber(-1.0, upperBound:1.0)
let y = randomNumber(-1.0, upperBound:1.0)
if x*x+y*y <= 1 {
pointInsideCount += 1
}
}
let result = Double(pointInsideCount) / Double(pointNumber) * 4
let piString = String(format: "%.50f", result)
print("Pi is \(piString)")
I 运行 "di -n randomNumber" 获取randomNumber函数的assemble代码
swiftTest`swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double:
0x10023c160 <+0>: pushq %rbp
0x10023c161 <+1>: movq %rsp, %rbp
0x10023c164 <+4>: subq [=11=]x20, %rsp
0x10023c168 <+8>: movsd %xmm0, -0x8(%rbp)
0x10023c16d <+13>: movsd %xmm1, -0x10(%rbp)
0x10023c172 <+18>: movsd %xmm0, -0x18(%rbp)
0x10023c177 <+23>: movsd %xmm1, -0x20(%rbp)
0x10023c17c <+28>: callq 0x10027585e ; symbol stub for: rand
0x10023c181 <+33>: movsd 0x3bc1f(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 352
0x10023c189 <+41>: cvtsi2sdl %eax, %xmm1
0x10023c18d <+45>: divsd %xmm0, %xmm1
0x10023c191 <+49>: movsd -0x20(%rbp), %xmm0
0x10023c196 <+54>: movsd -0x18(%rbp), %xmm2
0x10023c19b <+59>: subsd %xmm2, %xmm0
0x10023c19f <+63>: mulsd %xmm0, %xmm1
0x10023c1a3 <+67>: addsd %xmm1, %xmm2
0x10023c1a7 <+71>: movaps %xmm2, %xmm0
0x10023c1aa <+74>: addq [=11=]x20, %rsp
0x10023c1ae <+78>: popq %rbp
0x10023c1af <+79>: retq
和运行“di -f”得到孔文件的assemble代码
swiftTest`main:
0x10023bcd0 <+0>: pushq %rbp
0x10023bcd1 <+1>: movq %rsp, %rbp
0x10023bcd4 <+4>: subq [=12=]x120, %rsp
0x10023bcdb <+11>: leaq 0x9340e(%rip), %rax ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
0x10023bce2 <+18>: leaq 0x933ff(%rip), %rcx ; static Swift.Process._argc : Swift.Int32
0x10023bce9 <+25>: movl %edi, (%rcx)
0x10023bceb <+27>: cmpq $-0x1, (%rax)
0x10023bcf2 <+34>: movq %rsi, -0x60(%rbp)
0x10023bcf6 <+38>: je 0x10023bd0e ; <+62> at main.swift
0x10023bcf8 <+40>: leaq 0x933f1(%rip), %rdi ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token6
0x10023bcff <+47>: leaq -0x99d56(%rip), %rax ; globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func6
0x10023bd06 <+54>: movq %rax, %rsi
0x10023bd09 <+57>: callq 0x100266870 ; swift_once
0x10023bd0e <+62>: leaq 0x933e3(%rip), %rax ; static Swift.Process._unsafeArgv : Swift.UnsafeMutablePointer<Swift.UnsafeMutablePointer<Swift.Int8>>
0x10023bd15 <+69>: movq -0x60(%rbp), %rcx
0x10023bd19 <+73>: movq %rcx, (%rax)
0x10023bd1c <+76>: movq [=12=]x989680, 0x93499(%rip) ; lazy cache variable for type metadata for Swift.VaListBuilder + 4
0x10023bd27 <+87>: movq [=12=]x0, 0x93496(%rip) ; swiftTest.pointNumber : Swift.Int + 4
0x10023bd32 <+98>: movq 0x93487(%rip), %rax ; swiftTest.pointNumber : Swift.Int
0x10023bd39 <+105>: movq %rax, -0x68(%rbp)
0x10023bd3d <+109>: xorl %eax, %eax
0x10023bd3f <+111>: movl %eax, %ecx
0x10023bd41 <+113>: movq -0x68(%rbp), %rdx
0x10023bd45 <+117>: cmpq %rdx, %rcx
0x10023bd48 <+120>: setle %sil
0x10023bd4c <+124>: testb [=12=]x1, %sil
0x10023bd50 <+128>: jne 0x10023bd54 ; <+132> at main.swift:17
0x10023bd52 <+130>: jmp 0x10023bdb3 ; <+227> at main.swift:17
0x10023bd54 <+132>: movq -0x68(%rbp), %rax
0x10023bd58 <+136>: incq %rax
0x10023bd5b <+139>: seto %cl
0x10023bd5e <+142>: movq -0x68(%rbp), %rdx
0x10023bd62 <+146>: cmpq %rdx, %rax
0x10023bd65 <+149>: setg %sil
0x10023bd69 <+153>: testb [=12=]x1, %sil
0x10023bd6d <+157>: movb %cl, -0x69(%rbp)
0x10023bd70 <+160>: jne 0x10023bd74 ; <+164> at main.swift:17
0x10023bd72 <+162>: jmp 0x10023bd87 ; <+183> at main.swift:17
0x10023bd74 <+164>: movq -0x68(%rbp), %rax
0x10023bd78 <+168>: incq %rax
0x10023bd7b <+171>: seto %cl
0x10023bd7e <+174>: movq %rax, -0x78(%rbp)
0x10023bd82 <+178>: movb %cl, -0x79(%rbp)
0x10023bd85 <+181>: jmp 0x10023bddf ; <+271> at main.swift:17
0x10023bd87 <+183>: leaq 0x418a2(%rip), %rdi ; "fatal error"
0x10023bd8e <+190>: movl [=12=]xb, %eax
0x10023bd93 <+195>: movl %eax, %esi
0x10023bd95 <+197>: movl [=12=]x2, %eax
0x10023bd9a <+202>: leaq 0x487af(%rip), %rcx ; "Range end index has no valid successor"
0x10023bda1 <+209>: movl [=12=]x26, %edx
0x10023bda6 <+214>: movl %edx, %r8d
0x10023bda9 <+217>: movl %eax, %edx
0x10023bdab <+219>: movl %eax, %r9d
0x10023bdae <+222>: callq 0x1001a80f0 ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
0x10023bdb3 <+227>: leaq 0x41876(%rip), %rdi ; "fatal error"
0x10023bdba <+234>: movl [=12=]xb, %eax
0x10023bdbf <+239>: movl %eax, %esi
0x10023bdc1 <+241>: movl [=12=]x2, %eax
0x10023bdc6 <+246>: leaq 0x48753(%rip), %rcx ; "Can't form Range with end < start"
0x10023bdcd <+253>: movl [=12=]x21, %edx
0x10023bdd2 <+258>: movl %edx, %r8d
0x10023bdd5 <+261>: movl %eax, %edx
0x10023bdd7 <+263>: movl %eax, %r9d
0x10023bdda <+266>: callq 0x1001a80f0 ; function signature specialization <Arg[0] = Exploded, Arg[1] = Exploded, Arg[2] = Dead, Arg[3] = Dead> of Swift._fatalErrorMessage (Swift.StaticString, Swift.StaticString, Swift.StaticString, Swift.UInt) -> ()
0x10023bddf <+271>: leaq -0x30(%rbp), %rdi
0x10023bde3 <+275>: leaq -0x20(%rbp), %rsi
0x10023bde7 <+279>: movq [=12=]x0, -0x20(%rbp)
0x10023bdef <+287>: movq -0x78(%rbp), %rax
0x10023bdf3 <+291>: movq %rax, -0x18(%rbp)
0x10023bdf7 <+295>: callq 0x1000362e0 ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.Range.generate <A where A: Swift.ForwardIndexType> (Swift.Range<A>)() -> Swift.RangeGenerator<A>
0x10023bdfc <+300>: movq -0x30(%rbp), %rax
0x10023be00 <+304>: movq -0x28(%rbp), %rsi
0x10023be04 <+308>: movq %rax, -0x10(%rbp)
0x10023be08 <+312>: movq %rsi, -0x8(%rbp)
0x10023be0c <+316>: leaq -0x40(%rbp), %rdi
0x10023be10 <+320>: leaq -0x10(%rbp), %rsi
0x10023be14 <+324>: callq 0x100036960 ; generic specialization <Swift.Int with Swift.Int : Swift.ForwardIndexType in Swift, Swift.Int with Swift.Int : Swift._SignedIntegerType in Swift, Swift.Int with Swift.Int : Swift._BuiltinIntegerLiteralConvertible in Swift, Swift.Int> of Swift.RangeGenerator.next <A where A: Swift.ForwardIndexType> (inout Swift.RangeGenerator<A>)() -> Swift.Optional<A>
0x10023be19 <+329>: movq -0x40(%rbp), %rsi
0x10023be1d <+333>: movb -0x38(%rbp), %al
0x10023be20 <+336>: xorb [=12=]x1, %al
0x10023be22 <+338>: testb [=12=]x1, %al
0x10023be24 <+340>: movq %rsi, -0x88(%rbp)
0x10023be2b <+347>: jne 0x10023be32 ; <+354> at main.swift:17
0x10023be2d <+349>: jmp 0x10023bed4 ; <+516> at main.swift:23
0x10023be32 <+354>: movsd 0x3bf66(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
0x10023be3a <+362>: movsd 0x3bf56(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be42 <+370>: movq -0x88(%rbp), %rax
0x10023be49 <+377>: movq %rax, -0x48(%rbp)
0x10023be4d <+381>: callq 0x10023c160 ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
0x10023be52 <+386>: movsd 0x3bf46(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 344
0x10023be5a <+394>: movsd 0x3bf36(%rip), %xmm2 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be62 <+402>: movsd %xmm0, -0x50(%rbp)
0x10023be67 <+407>: movsd %xmm0, -0x90(%rbp)
0x10023be6f <+415>: movaps %xmm1, %xmm0
0x10023be72 <+418>: movaps %xmm2, %xmm1
0x10023be75 <+421>: callq 0x10023c160 ; swiftTest.randomNumber (Swift.Double, upperBound : Swift.Double) -> Swift.Double at main.swift:11
0x10023be7a <+426>: movsd 0x3bf16(%rip), %xmm1 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 336
0x10023be82 <+434>: movsd %xmm0, -0x58(%rbp)
0x10023be87 <+439>: movsd -0x90(%rbp), %xmm2
0x10023be8f <+447>: mulsd %xmm2, %xmm2
0x10023be93 <+451>: mulsd %xmm0, %xmm0
0x10023be97 <+455>: addsd %xmm0, %xmm2
0x10023be9b <+459>: ucomisd %xmm2, %xmm1
0x10023be9f <+463>: jb 0x10023becf ; <+511> at main.swift:23
0x10023bea1 <+465>: movq 0x93320(%rip), %rax ; swiftTest.pointInsideCount : Swift.Int
0x10023bea8 <+472>: incq %rax
0x10023beab <+475>: seto %cl
0x10023beae <+478>: movq %rax, -0x98(%rbp)
0x10023beb5 <+485>: movb %cl, -0x99(%rbp)
0x10023bebb <+491>: jo 0x10023c155 ; <+1157> at main.swift:21
0x10023bec1 <+497>: movq -0x98(%rbp), %rax
0x10023bec8 <+504>: movq %rax, 0x932f9(%rip) ; swiftTest.pointInsideCount : Swift.Int
0x10023becf <+511>: jmp 0x10023be0c ; <+316> at main.swift:17
0x10023bed4 <+516>: movsd 0x3beb4(%rip), %xmm0 ; witness table offset for Swift.VaListBuilder.__allocating_init (Swift.VaListBuilder.Type)() -> Swift.VaListBuilder + 328
0x10023bedc <+524>: cvtsi2sdq 0x932e3(%rip), %xmm1 ; swiftTest.pointInsideCount : Swift.Int
0x10023bee5 <+533>: cvtsi2sdq 0x932d2(%rip), %xmm2 ; swiftTest.pointNumber : Swift.Int
0x10023beee <+542>: divsd %xmm2, %xmm1
0x10023bef2 <+546>: mulsd %xmm0, %xmm1
0x10023bef6 <+550>: movsd %xmm1, 0x932d2(%rip) ; swiftTest.result : Swift.Double
0x10023befe <+558>: callq 0x10023c1b0 ; type metadata accessor for Swift.CVarArgType
0x10023bf03 <+563>: movl [=12=]x1, %ecx
0x10023bf08 <+568>: movl %ecx, %edi
0x10023bf0a <+570>: movq %rax, %rsi
0x10023bf0d <+573>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bf12 <+578>: leaq 0x4865e(%rip), %rdi ; "%.50f"
0x10023bf19 <+585>: movl [=12=]x5, %ecx
0x10023bf1e <+590>: movl %ecx, %esi
0x10023bf20 <+592>: movl [=12=]x1, %ecx
0x10023bf25 <+597>: movq %rdx, -0xa8(%rbp)
0x10023bf2c <+604>: movl %ecx, %edx
0x10023bf2e <+606>: movq %rax, -0xb0(%rbp)
0x10023bf35 <+613>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023bf3a <+618>: leaq 0x667b7(%rip), %rsi ; protocol witness table for Swift.Double : Swift.CVarArgType in Swift
0x10023bf41 <+625>: leaq 0x6a258(%rip), %rdi ; direct type metadata for Swift.Double
0x10023bf48 <+632>: addq [=12=]x8, %rdi
0x10023bf4f <+639>: movq -0xa8(%rbp), %r8
0x10023bf56 <+646>: movq %rdi, 0x18(%r8)
0x10023bf5a <+650>: movq %rsi, 0x20(%r8)
0x10023bf5e <+654>: movsd 0x9326a(%rip), %xmm0 ; swiftTest.result : Swift.Double
0x10023bf66 <+662>: movsd %xmm0, (%r8)
0x10023bf6b <+667>: movq %rax, %rdi
0x10023bf6e <+670>: movq %rdx, %rsi
0x10023bf71 <+673>: movq %rcx, %rdx
0x10023bf74 <+676>: movq -0xb0(%rbp), %rcx
0x10023bf7b <+683>: callq 0x10002dfa0 ; ext.Foundation.Swift.String.init (Swift.String.Type)(format : Swift.String, Swift.Array<Swift.CVarArgType>...) -> Swift.String
0x10023bf80 <+688>: movq %rax, 0x93251(%rip) ; swiftTest.piString : Swift.String
0x10023bf87 <+695>: movq %rdx, 0x93252(%rip) ; swiftTest.piString : Swift.String + 8
0x10023bf8e <+702>: movq %rcx, 0x93253(%rip) ; swiftTest.piString : Swift.String + 16
-> 0x10023bf95 <+709>: callq 0x10023c200 ; type metadata accessor for protocol<>
0x10023bf9a <+714>: movl [=12=]x1, %r9d
0x10023bfa0 <+720>: movl %r9d, %edi
0x10023bfa3 <+723>: movq %rax, %rsi
0x10023bfa6 <+726>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bfab <+731>: movl [=12=]x3, %r9d
0x10023bfb1 <+737>: movl %r9d, %edi
0x10023bfb4 <+740>: leaq 0x6fe25(%rip), %rcx ; direct type metadata for Swift.String
0x10023bfbb <+747>: addq [=12=]x8, %rcx
0x10023bfc2 <+754>: movq %rcx, 0x18(%rdx)
0x10023bfc6 <+758>: movq %rcx, %rsi
0x10023bfc9 <+761>: movq %rax, -0xb8(%rbp)
0x10023bfd0 <+768>: movq %rdx, -0xc0(%rbp)
0x10023bfd7 <+775>: callq 0x100045770 ; Swift._allocateUninitializedArray <A> (Builtin.Word) -> (Swift.Array<A>, Builtin.RawPointer)
0x10023bfdc <+780>: leaq 0x4859a(%rip), %rdi ; "Pi is "
0x10023bfe3 <+787>: movl [=12=]x6, %r9d
0x10023bfe9 <+793>: movl %r9d, %esi
0x10023bfec <+796>: movl [=12=]x1, %r9d
0x10023bff2 <+802>: movq %rdx, -0xc8(%rbp)
0x10023bff9 <+809>: movl %r9d, %edx
0x10023bffc <+812>: movq %rax, -0xd0(%rbp)
0x10023c003 <+819>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023c008 <+824>: movq %rax, %rdi
0x10023c00b <+827>: movq %rdx, %rsi
0x10023c00e <+830>: movq %rcx, %rdx
0x10023c011 <+833>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c016 <+838>: movq -0xc8(%rbp), %rsi
0x10023c01d <+845>: movq %rax, (%rsi)
0x10023c020 <+848>: movq %rdx, 0x8(%rsi)
0x10023c024 <+852>: movq %rcx, 0x10(%rsi)
0x10023c028 <+856>: movq 0x931a9(%rip), %rdi ; swiftTest.piString : Swift.String
0x10023c02f <+863>: movq 0x931aa(%rip), %rsi ; swiftTest.piString : Swift.String + 8
0x10023c036 <+870>: movq 0x931ab(%rip), %rax ; swiftTest.piString : Swift.String + 16
0x10023c03d <+877>: movq %rdi, -0xd8(%rbp)
0x10023c044 <+884>: movq %rax, %rdi
0x10023c047 <+887>: movq %rsi, -0xe0(%rbp)
0x10023c04e <+894>: movq %rax, -0xe8(%rbp)
0x10023c055 <+901>: callq 0x100268160 ; swift_unknownRetain
0x10023c05a <+906>: movq -0xd8(%rbp), %rdi
0x10023c061 <+913>: movq -0xe0(%rbp), %rsi
0x10023c068 <+920>: movq -0xe8(%rbp), %rdx
0x10023c06f <+927>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c074 <+932>: leaq 0x40d15(%rip), %rdi ; ""
0x10023c07b <+939>: xorl %r9d, %r9d
0x10023c07e <+942>: movl %r9d, %esi
0x10023c081 <+945>: movl [=12=]x1, %r9d
0x10023c087 <+951>: movq -0xc8(%rbp), %r8
0x10023c08e <+958>: movq %rax, 0x18(%r8)
0x10023c092 <+962>: movq %rdx, 0x20(%r8)
0x10023c096 <+966>: movq %rcx, 0x28(%r8)
0x10023c09a <+970>: movl %r9d, %edx
0x10023c09d <+973>: callq 0x100001aa0 ; Swift.String.init (Swift.String.Type)(_builtinStringLiteral : Builtin.RawPointer, byteSize : Builtin.Word, isASCII : Builtin.Int1) -> Swift.String
0x10023c0a2 <+978>: movq %rax, %rdi
0x10023c0a5 <+981>: movq %rdx, %rsi
0x10023c0a8 <+984>: movq %rcx, %rdx
0x10023c0ab <+987>: callq 0x1000470d0 ; Swift.String.init (Swift.String.Type)(stringInterpolationSegment : Swift.String) -> Swift.String
0x10023c0b0 <+992>: movq -0xc8(%rbp), %rsi
0x10023c0b7 <+999>: movq %rax, 0x30(%rsi)
0x10023c0bb <+1003>: movq %rdx, 0x38(%rsi)
0x10023c0bf <+1007>: movq %rcx, 0x40(%rsi)
0x10023c0c3 <+1011>: movq -0xd0(%rbp), %rdi
0x10023c0ca <+1018>: callq 0x1000470c0 ; Swift.String.init (Swift.String.Type)(stringInterpolation : Swift.Array<Swift.String>...) -> Swift.String
0x10023c0cf <+1023>: movq -0xc0(%rbp), %rsi
0x10023c0d6 <+1030>: movq %rax, (%rsi)
0x10023c0d9 <+1033>: movq %rdx, 0x8(%rsi)
0x10023c0dd <+1037>: movq %rcx, 0x10(%rsi)
0x10023c0e1 <+1041>: callq 0x10012aa70 ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 1)
0x10023c0e6 <+1046>: movq %rax, -0xf0(%rbp)
0x10023c0ed <+1053>: movq %rdx, -0xf8(%rbp)
0x10023c0f4 <+1060>: movq %rcx, -0x100(%rbp)
0x10023c0fb <+1067>: callq 0x10012aa90 ; Swift.(print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()).(default argument 2)
0x10023c100 <+1072>: movq -0xb8(%rbp), %rdi
0x10023c107 <+1079>: movq -0xf0(%rbp), %rsi
0x10023c10e <+1086>: movq -0xf8(%rbp), %r8
0x10023c115 <+1093>: movq %rdx, -0x108(%rbp)
0x10023c11c <+1100>: movq %r8, %rdx
0x10023c11f <+1103>: movq -0x100(%rbp), %r10
0x10023c126 <+1110>: movq %rcx, -0x110(%rbp)
0x10023c12d <+1117>: movq %r10, %rcx
0x10023c130 <+1120>: movq %rax, %r8
0x10023c133 <+1123>: movq -0x108(%rbp), %r9
0x10023c13a <+1130>: movq -0x110(%rbp), %rax
0x10023c141 <+1137>: movq %rax, (%rsp)
0x10023c145 <+1141>: callq 0x10012aab0 ; Swift.print (Swift.Array<protocol<>>, separator : Swift.String, terminator : Swift.String) -> ()
0x10023c14a <+1146>: xorl %eax, %eax
0x10023c14c <+1148>: addq [=12=]x120, %rsp
0x10023c153 <+1155>: popq %rbp
0x10023c154 <+1156>: retq
0x10023c155 <+1157>: ud2
0x10023c157 <+1159>: nopw (%rax,%rax)
我可以如下估算时间消耗吗?
randomNumber 函数包含大约 20 条指令,因此 x 和 y 的计算包含大约 40 条指令。 pointInsideCount的加入只是执行几条指令,所以for循环中大概有4~5打指令(假设是50条)。 for循环外的时间消耗可以忽略
如果我假设这个程序平均每个周期4560U 运行 2条指令,当循环计数为10^8时,空洞时间消耗约为50 * 10^8 / (1.7 * 10^9 * 2)
您不能假设所有循环都使用相同的 IPC。当然那个循环 运行s 2 IPC,但这并没有告诉你关于其他循环的任何信息。您必须仔细分析代码以找到瓶颈和并行度。
如果您可以安全地假设没有缓存未命中或分支预测错误,您可以通过英特尔的静态代码分析器 using IACA 为特定英特尔微体系结构的小循环获得合理的周期计数估计。它远非真实硬件的完整周期精确模拟,但它确实有自己的模型来将 uops 分配到端口。它通常会得到合理的数字。
您也可以使用 Agner Fog's 指令表和微架构指南手动进行相同类型的分析(包括 IACA 不了解的 CPU)。
当循环在循环携带的依赖链的延迟上或仅使一个执行端口饱和时出现瓶颈时,事情通常会非常准确地进行。
在高吞吐量下,有许多细微的影响可能会瓶颈代码,而您希望 运行 每时钟 4 个融合域微指令。前端只能维持相当小的循环(~28 或 56 微指令),因为由于微指令缓存行边界和微指令不以 4 为一组,即使微指令缓存也具有有限的吞吐量。
Significant FMA performance anomaly experienced in the Intel Broadwell processor 是一个很好的例子,说明事情是如何变得非常难以理解的。您可能希望代码使所有三个向量执行端口都饱和,它在 Haswell 上和几乎在 Skylake 上都是这样,但在 Broadwell 上什至没有关闭。这甚至不是前端瓶颈,因为循环足够小,可以放入循环缓冲区。
同样,所有这些都没有考虑分支预测错误或缓存未命中。
如果这一切听起来真的很难和复杂,那是因为它确实如此。这就是基准测试比静态分析更有用的原因。但是,微基准测试 确实 很容易出错。您应该查看 asm 以确保您没有搞砸并让编译器优化掉您想要测试的东西。您还需要了解很多有关 CPU 工作原理的知识,以避免陷阱,例如将其他缓慢的东西放入您的微基准测试中,并让它支配 运行-time 而不是您想要测试的东西。