Java ASM 字节码 - 查找属于特定方法调用的所有指令
Java ASM Bytecode - Find all instructions belonging to a specific method-call
嗨.
我想找到方法调用的 start 和 end 之间的指令范围。
我不想简单地改变方法调用 owner/name/desc.
有了预期的结果,我希望能够做到:
- 完全删除方法调用
- 通过在前面或后面添加新参数来修改方法调用
我一直在尝试不同的技术来实现这一目标:
- ASM 分析器(使用 SourceInterpreter)
- 循环指令集,正向和反向,尝试通过计算指令数或计算堆栈高度来定位
start
和 end
- 通过 Whosebug 进行搜索(未发现任何导致预期行为的结果)
我会给你一些我想要的例子,以防这里有任何混淆。
首先,看下面我的测试代码,然后再回到这里。
我希望 find/remove 对 anotherMethod4
的整个方法调用并将其替换为简单的 true
,从而得到以下代码:
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
true ? "j" : "k"
) ? "l" : "m"
);
我希望 find/remove 对 anotherMethod1
的整个方法调用并将其替换为简单的 false
,结果代码为:
System.out.println(
false ? "l" : "m"
);
我希望删除对 System.out.println
的整个方法调用,导致此代码:
private Main()
{
}
这一定是可能的吧?
这是我当前的测试代码:
private Main()
{
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
boolean anotherMethod1(String str, String oof)
{
return true;
}
boolean anotherMethod2(String str, String oof)
{
return true;
}
boolean anotherMethod3(String str, String oof)
{
return true;
}
boolean anotherMethod4(String str, String oof)
{
return true;
}
方法调用的参数可能有副作用,例如 method(variable = value)
,甚至可能无法删除,例如什么时候会导致在删除调用后访问未初始化的变量。在字节码级别,属于参数评估的指令可以与任意不相关的指令交错。
但是当我们限制范围时,我们可以有一个解决方案。在您的示例中,所有调用都是 invokevirtual
在隐含的 this
或 static
字段的值上调用的指令。对于这些调用,我们确实可以使用 ASM 的 Analyzer
和 SourceInterpreter
来识别初始的 aload
或 getstatic
指令,并假设来自该指令的所有指令和调用指令都属于方法调用表达式。
我们可以使用这样的代码
public class IdentifyCall {
static IdentifyCall getInputs(
String internalClassName, MethodNode toAnalyze) throws AnalyzerException {
Map<AbstractInsnNode, Set<AbstractInsnNode>> sources = new HashMap<>();
SourceInterpreter i = new SourceInterpreter();
Analyzer<SourceValue> analyzer = new Analyzer<>(i);
return new IdentifyCall(toAnalyze.instructions, analyzer.analyze(internalClassName, toAnalyze));
}
private final InsnList instructions;
private final Frame<SourceValue>[] frames;
private IdentifyCall(InsnList il, Frame<SourceValue>[] analyzed) {
instructions = il;
frames = analyzed;
}
int[] getSpan(AbstractInsnNode i) {
MethodInsnNode mn = (MethodInsnNode)i;
// can't use getArgumentsAndReturnSizes, as for the frame, double and long do not count as 2
int nArg = mn.desc.startsWith("()")? 0: Type.getArgumentTypes(mn.desc).length;
int end = instructions.indexOf(mn);
Frame<SourceValue> f = frames[end];
SourceValue receiver = f.getStack(f.getStackSize() - nArg - 1);
if(receiver.insns.size() != 1) throw new UnsupportedOperationException();
AbstractInsnNode n = receiver.insns.iterator().next();
if(n.getOpcode() != Opcodes.ALOAD && n.getOpcode() != Opcodes.GETSTATIC)
throw new UnsupportedOperationException(""+n.getOpcode());
return new int[] { instructions.indexOf(n), end };
}
}
并用下面的例子来演示
public class IdentifyCallExample {
private void toAnalyze() {
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
boolean anotherMethod1(String str, String oof) {
return true;
}
boolean anotherMethod2(String str, String oof) {
return true;
}
boolean anotherMethod3(String str, String oof) {
return true;
}
boolean anotherMethod4(String str, String oof) {
return true;
}
public static void main(String[] args) throws AnalyzerException, IOException {
Class<?> me = MethodHandles.lookup().lookupClass();
ClassReader r = new ClassReader(me.getResourceAsStream(me.getSimpleName()+".class"));
ClassNode cn = new ClassNode();
r.accept(cn, ClassReader.SKIP_DEBUG|ClassReader.SKIP_FRAMES);
MethodNode toAnalyze = null;
for(MethodNode mn: cn.methods)
if(mn.name.equals("toAnalyze")) {
toAnalyze = mn;
break;
}
List<int[]> invocations = new ArrayList<>();
final InsnList instructions = toAnalyze.instructions;
IdentifyCall identifyCall
= IdentifyCall.getInputs(me.getName().replace('.', '/'), toAnalyze);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
if(instr.getOpcode()!= Opcodes.INVOKEVIRTUAL) continue;
invocations.add(identifyCall.getSpan(instr));
}
printIt(invocations, instructions);
}
private static void printIt(List<int[]> invocations, final InsnList instructions) {
List<Level> levels = toTree(invocations);
Textifier toText = new Textifier();
TraceMethodVisitor tmv = new TraceMethodVisitor(toText);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
boolean line = false;
level: for(Level l: levels) {
if(ix >= l.lo && ix <= l.hi) {
for(int[] b: l.branches) {
if(ix < b[0] || ix > b[1]) continue;
System.out.print(line?
(b[0] == ix? b[1] == ix? "─[": "┬─": b[1] == ix? "┴─": "┼─"):
(b[0] == ix? b[1] == ix? " [": "┌─": b[1] == ix? "└─": "│ "));
line |= b[0] == ix || b[1] == ix;
continue level;
}
}
System.out.print(line? "──": " ");
}
instr.accept(tmv);
System.out.print(toText.text.get(0));
toText.text.clear();
}
}
static class Level {
int lo, hi;
ArrayDeque<int[]> branches=new ArrayDeque<>();
Level(int[] b) { lo=b[0]; hi=b[1]; branches.add(b); }
boolean insert(int[] b) {
if(b[1]<=lo) { branches.addFirst(b); lo=b[0]; }
else if(b[0]>=hi) { branches.addLast(b); hi=b[1]; }
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
return true;
}
private boolean tryHead(int[] b, int lo, int hi) {
int[] head=branches.removeFirst();
try {
if(head[1] > b[0]) return false;
if(branches.isEmpty() || (lo=branches.getFirst()[0])>=b[1]) {
branches.addFirst(b);
return true;
}
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
} finally { branches.addFirst(head); }
}
private boolean tryTail(int[] b, int lo, int hi) {
int[] tail=branches.removeLast();
try {
if(tail[0] < b[1]) return false;
if(branches.isEmpty() || (hi=branches.getLast()[1])<=b[0]) {
branches.addLast(b);
return true;
}
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
} finally { branches.addLast(tail); }
}
}
static List<Level> toTree(List<int[]> list) {
if(list.isEmpty()) return Collections.emptyList();
if(list.size()==1) return Collections.singletonList(new Level(list.get(0)));
list.sort(Comparator.comparingInt(b -> b[1] - b[0]));
ArrayList<Level> l=new ArrayList<>();
insert: for(int[] b: list) {
for(Level level: l) if(level.insert(b)) continue insert;
l.add(new Level(b));
}
if(l.size() > 1) Collections.reverse(l);
return l;
}
}
这将打印
┌───── GETSTATIC java/lang/System.out : Ljava/io/PrintStream;
│ ┌─── ALOAD 0
│ │ ┌─ ALOAD 0
│ │ │ LDC "a"
│ │ │ LDC "b"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod2 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L0
│ │ LDC "c"
│ │ GOTO L1
│ │ L0
│ │ ┌─ ALOAD 0
│ │ │ LDC "d"
│ │ │ LDC "e"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod3 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L2
│ │ LDC "f"
│ │ GOTO L1
│ │ L2
│ │ LDC "g"
│ │ L1
│ │ ┌─ ALOAD 0
│ │ │ LDC "h"
│ │ │ LDC "i"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod4 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L3
│ │ LDC "j"
│ │ GOTO L4
│ │ L3
│ │ LDC "k"
│ │ L4
│ └─── INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod1 (Ljava/lang/String;Ljava/lang/String;)Z
│ IFEQ L5
│ LDC "l"
│ GOTO L6
│ L5
│ LDC "m"
│ L6
└───── INVOKEVIRTUAL java/io/PrintStream.println (Ljava/lang/String;)V
RETURN
当我们想要支持更复杂的接收者表达式或 static
方法时,其第一个参数可以是任意表达式,事情会变得更加复杂。 Frame<SourceValue>
允许我们识别将当前值推送到操作数堆栈的指令,但在像 a + b
这样的表达式的情况下,它只会是 iadd
指令,我们必须分析 iadd
指令的帧以获取其输入。与其为每一种指令实现它,更容易扩展解释器,获取信息并存储它,例如在 Map
中,因为 Analyzer
已经完成了这项工作。然后,我们可以递归地收集所有输入。
但这只提供了直接和间接的输入源,但是在条件表达式的情况下,我们还需要条件的输入。为此,我们必须识别并存储条件分支。每当报告输入可能来自不同的源指令时,我们必须检查相关分支并添加它们的条件。
然后我们再次使用简化假设,第一个和最后一个之间的所有指令也属于调用表达式。
更详细的代码看起来像
public class IdentifyCall {
private final InsnList instructions;
private final Map<AbstractInsnNode, Set<SourceValue>> sources;
private final TreeMap<int[],AbstractInsnNode> conditionals;
private IdentifyCall(InsnList il,
Map<AbstractInsnNode, Set<SourceValue>> s, TreeMap<int[], AbstractInsnNode> c) {
instructions = il;
sources = s;
conditionals = c;
}
Set<AbstractInsnNode> getAllInputsOf(AbstractInsnNode instr) {
Set<AbstractInsnNode> source = new HashSet<>();
List<SourceValue> pending = new ArrayList<>(sources.get(instr));
for (int pIx = 0; pIx < pending.size(); pIx++) {
SourceValue sv = pending.get(pIx);
final boolean branch = sv.insns.size() > 1;
for(AbstractInsnNode in: sv.insns) {
if(source.add(in))
pending.addAll(sources.getOrDefault(in, Collections.emptySet()));
if(branch) {
int ix = instructions.indexOf(in);
conditionals.forEach((b,i) -> {
if(b[0] <= ix && b[1] >= ix && source.add(i))
pending.addAll(sources.getOrDefault(i, Collections.emptySet()));
});
}
}
}
return source;
}
static IdentifyCall getInputs(
String internalClassName, MethodNode toAnalyze) throws AnalyzerException {
InsnList instructions = toAnalyze.instructions;
Map<AbstractInsnNode, Set<SourceValue>> sources = new HashMap<>();
SourceInterpreter i = new SourceInterpreter() {
@Override
public SourceValue unaryOperation(AbstractInsnNode insn, SourceValue value) {
sources.computeIfAbsent(insn, x -> new HashSet<>()).add(value);
return super.unaryOperation(insn, value);
}
@Override
public SourceValue binaryOperation(AbstractInsnNode insn, SourceValue v1, SourceValue v2) {
addAll(insn, Arrays.asList(v1, v2));
return super.binaryOperation(insn, v1, v2);
}
@Override
public SourceValue ternaryOperation(AbstractInsnNode insn, SourceValue v1, SourceValue v2, SourceValue v3) {
addAll(insn, Arrays.asList(v1, v2, v3));
return super.ternaryOperation(insn, v1, v2, v3);
}
@Override
public SourceValue naryOperation(AbstractInsnNode insn, List<? extends SourceValue> values) {
addAll(insn, values);
return super.naryOperation(insn, values);
}
private void addAll(AbstractInsnNode insn, List<? extends SourceValue> values) {
sources.computeIfAbsent(insn, x -> new HashSet<>()).addAll(values);
}
};
TreeMap<int[],AbstractInsnNode> conditionals = new TreeMap<>(
Comparator.comparingInt((int[] a) -> a[0]).thenComparingInt(a -> a[1]));
Analyzer<SourceValue> analyzer = new Analyzer<>(i) {
@Override
protected void newControlFlowEdge(int insn, int successor) {
if(insn != successor - 1) {
AbstractInsnNode instruction = instructions.get(insn);
Set<SourceValue> dep = sources.get(instruction);
if(dep != null && !dep.isEmpty())
conditionals.put(new int[]{ insn, successor }, instruction);
}
}
};
analyzer.analyze(internalClassName, toAnalyze);
return new IdentifyCall(instructions, sources, conditionals);
}
}
然后,我们还使用更详细的示例代码:
public class IdentifyCallExample {
private void toAnalyze() {
(Math.random()>0.5? System.out: System.err).println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
static boolean anotherMethod1(String str, String oof) {
return true;
}
static boolean anotherMethod2(String str, String oof) {
return true;
}
static boolean anotherMethod3(String str, String oof) {
return true;
}
static boolean anotherMethod4(String str, String oof) {
return true;
}
public static void main(String[] args) throws AnalyzerException, IOException {
Class<?> me = MethodHandles.lookup().lookupClass();
ClassReader r = new ClassReader(me.getResourceAsStream(me.getSimpleName()+".class"));
ClassNode cn = new ClassNode();
r.accept(cn, ClassReader.SKIP_DEBUG|ClassReader.SKIP_FRAMES);
MethodNode toAnalyze = null;
for(MethodNode mn: cn.methods)
if(mn.name.equals("toAnalyze")) {
toAnalyze = mn;
break;
}
List<int[]> invocations = new ArrayList<>();
final InsnList instructions = toAnalyze.instructions;
IdentifyCall sources = IdentifyCall.getInputs(me.getName().replace('.', '/'), toAnalyze);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
if(instr.getType() != AbstractInsnNode.METHOD_INSN) continue;
IntSummaryStatistics s = sources.getAllInputsOf(instr).stream()
.mapToInt(instructions::indexOf).summaryStatistics();
s.accept(ix);
invocations.add(new int[]{s.getMin(), s.getMax()});
}
printIt(invocations, instructions);
}
// remainder as in the simple variant
现在将打印
┌────[ INVOKESTATIC java/lang/Math.random ()D
│ LDC 0.5
│ DCMPL
│ IFLE L0
│ GETSTATIC java/lang/System.out : Ljava/io/PrintStream;
│ GOTO L1
│ L0
│ GETSTATIC java/lang/System.err : Ljava/io/PrintStream;
│ L1
│ ┌─┬─ LDC "a"
│ │ │ LDC "b"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod2 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L2
│ │ LDC "c"
│ │ GOTO L3
│ │ L2
│ │ ┌─ LDC "d"
│ │ │ LDC "e"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod3 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L4
│ │ LDC "f"
│ │ GOTO L3
│ │ L4
│ │ LDC "g"
│ │ L3
│ │ ┌─ LDC "h"
│ │ │ LDC "i"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod4 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L5
│ │ LDC "j"
│ │ GOTO L6
│ │ L5
│ │ LDC "k"
│ │ L6
│ └─── INVOKESTATIC complex/IdentifyCallExample.anotherMethod1 (Ljava/lang/String;Ljava/lang/String;)Z
│ IFEQ L7
│ LDC "l"
│ GOTO L8
│ L7
│ LDC "m"
│ L8
└───── INVOKEVIRTUAL java/io/PrintStream.println (Ljava/lang/String;)V
RETURN
这可能仍然无法涵盖所有可能的情况,但对于您的用例来说已经足够了。
嗨.
我想找到方法调用的 start 和 end 之间的指令范围。
我不想简单地改变方法调用 owner/name/desc.
有了预期的结果,我希望能够做到:
- 完全删除方法调用
- 通过在前面或后面添加新参数来修改方法调用
我一直在尝试不同的技术来实现这一目标:
- ASM 分析器(使用 SourceInterpreter)
- 循环指令集,正向和反向,尝试通过计算指令数或计算堆栈高度来定位
start
和end
- 通过 Whosebug 进行搜索(未发现任何导致预期行为的结果)
我会给你一些我想要的例子,以防这里有任何混淆。
首先,看下面我的测试代码,然后再回到这里。
我希望 find/remove 对 anotherMethod4
的整个方法调用并将其替换为简单的 true
,从而得到以下代码:
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
true ? "j" : "k"
) ? "l" : "m"
);
我希望 find/remove 对 anotherMethod1
的整个方法调用并将其替换为简单的 false
,结果代码为:
System.out.println(
false ? "l" : "m"
);
我希望删除对 System.out.println
的整个方法调用,导致此代码:
private Main()
{
}
这一定是可能的吧?
这是我当前的测试代码:
private Main()
{
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
boolean anotherMethod1(String str, String oof)
{
return true;
}
boolean anotherMethod2(String str, String oof)
{
return true;
}
boolean anotherMethod3(String str, String oof)
{
return true;
}
boolean anotherMethod4(String str, String oof)
{
return true;
}
方法调用的参数可能有副作用,例如 method(variable = value)
,甚至可能无法删除,例如什么时候会导致在删除调用后访问未初始化的变量。在字节码级别,属于参数评估的指令可以与任意不相关的指令交错。
但是当我们限制范围时,我们可以有一个解决方案。在您的示例中,所有调用都是 invokevirtual
在隐含的 this
或 static
字段的值上调用的指令。对于这些调用,我们确实可以使用 ASM 的 Analyzer
和 SourceInterpreter
来识别初始的 aload
或 getstatic
指令,并假设来自该指令的所有指令和调用指令都属于方法调用表达式。
我们可以使用这样的代码
public class IdentifyCall {
static IdentifyCall getInputs(
String internalClassName, MethodNode toAnalyze) throws AnalyzerException {
Map<AbstractInsnNode, Set<AbstractInsnNode>> sources = new HashMap<>();
SourceInterpreter i = new SourceInterpreter();
Analyzer<SourceValue> analyzer = new Analyzer<>(i);
return new IdentifyCall(toAnalyze.instructions, analyzer.analyze(internalClassName, toAnalyze));
}
private final InsnList instructions;
private final Frame<SourceValue>[] frames;
private IdentifyCall(InsnList il, Frame<SourceValue>[] analyzed) {
instructions = il;
frames = analyzed;
}
int[] getSpan(AbstractInsnNode i) {
MethodInsnNode mn = (MethodInsnNode)i;
// can't use getArgumentsAndReturnSizes, as for the frame, double and long do not count as 2
int nArg = mn.desc.startsWith("()")? 0: Type.getArgumentTypes(mn.desc).length;
int end = instructions.indexOf(mn);
Frame<SourceValue> f = frames[end];
SourceValue receiver = f.getStack(f.getStackSize() - nArg - 1);
if(receiver.insns.size() != 1) throw new UnsupportedOperationException();
AbstractInsnNode n = receiver.insns.iterator().next();
if(n.getOpcode() != Opcodes.ALOAD && n.getOpcode() != Opcodes.GETSTATIC)
throw new UnsupportedOperationException(""+n.getOpcode());
return new int[] { instructions.indexOf(n), end };
}
}
并用下面的例子来演示
public class IdentifyCallExample {
private void toAnalyze() {
System.out.println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
boolean anotherMethod1(String str, String oof) {
return true;
}
boolean anotherMethod2(String str, String oof) {
return true;
}
boolean anotherMethod3(String str, String oof) {
return true;
}
boolean anotherMethod4(String str, String oof) {
return true;
}
public static void main(String[] args) throws AnalyzerException, IOException {
Class<?> me = MethodHandles.lookup().lookupClass();
ClassReader r = new ClassReader(me.getResourceAsStream(me.getSimpleName()+".class"));
ClassNode cn = new ClassNode();
r.accept(cn, ClassReader.SKIP_DEBUG|ClassReader.SKIP_FRAMES);
MethodNode toAnalyze = null;
for(MethodNode mn: cn.methods)
if(mn.name.equals("toAnalyze")) {
toAnalyze = mn;
break;
}
List<int[]> invocations = new ArrayList<>();
final InsnList instructions = toAnalyze.instructions;
IdentifyCall identifyCall
= IdentifyCall.getInputs(me.getName().replace('.', '/'), toAnalyze);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
if(instr.getOpcode()!= Opcodes.INVOKEVIRTUAL) continue;
invocations.add(identifyCall.getSpan(instr));
}
printIt(invocations, instructions);
}
private static void printIt(List<int[]> invocations, final InsnList instructions) {
List<Level> levels = toTree(invocations);
Textifier toText = new Textifier();
TraceMethodVisitor tmv = new TraceMethodVisitor(toText);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
boolean line = false;
level: for(Level l: levels) {
if(ix >= l.lo && ix <= l.hi) {
for(int[] b: l.branches) {
if(ix < b[0] || ix > b[1]) continue;
System.out.print(line?
(b[0] == ix? b[1] == ix? "─[": "┬─": b[1] == ix? "┴─": "┼─"):
(b[0] == ix? b[1] == ix? " [": "┌─": b[1] == ix? "└─": "│ "));
line |= b[0] == ix || b[1] == ix;
continue level;
}
}
System.out.print(line? "──": " ");
}
instr.accept(tmv);
System.out.print(toText.text.get(0));
toText.text.clear();
}
}
static class Level {
int lo, hi;
ArrayDeque<int[]> branches=new ArrayDeque<>();
Level(int[] b) { lo=b[0]; hi=b[1]; branches.add(b); }
boolean insert(int[] b) {
if(b[1]<=lo) { branches.addFirst(b); lo=b[0]; }
else if(b[0]>=hi) { branches.addLast(b); hi=b[1]; }
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
return true;
}
private boolean tryHead(int[] b, int lo, int hi) {
int[] head=branches.removeFirst();
try {
if(head[1] > b[0]) return false;
if(branches.isEmpty() || (lo=branches.getFirst()[0])>=b[1]) {
branches.addFirst(b);
return true;
}
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
} finally { branches.addFirst(head); }
}
private boolean tryTail(int[] b, int lo, int hi) {
int[] tail=branches.removeLast();
try {
if(tail[0] < b[1]) return false;
if(branches.isEmpty() || (hi=branches.getLast()[1])<=b[0]) {
branches.addLast(b);
return true;
}
else return b[0]>lo && b[1] < hi
&& (b[0]+b[1])>>1 > (lo+hi)>>1? tryTail(b, lo, hi): tryHead(b, lo, hi);
} finally { branches.addLast(tail); }
}
}
static List<Level> toTree(List<int[]> list) {
if(list.isEmpty()) return Collections.emptyList();
if(list.size()==1) return Collections.singletonList(new Level(list.get(0)));
list.sort(Comparator.comparingInt(b -> b[1] - b[0]));
ArrayList<Level> l=new ArrayList<>();
insert: for(int[] b: list) {
for(Level level: l) if(level.insert(b)) continue insert;
l.add(new Level(b));
}
if(l.size() > 1) Collections.reverse(l);
return l;
}
}
这将打印
┌───── GETSTATIC java/lang/System.out : Ljava/io/PrintStream;
│ ┌─── ALOAD 0
│ │ ┌─ ALOAD 0
│ │ │ LDC "a"
│ │ │ LDC "b"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod2 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L0
│ │ LDC "c"
│ │ GOTO L1
│ │ L0
│ │ ┌─ ALOAD 0
│ │ │ LDC "d"
│ │ │ LDC "e"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod3 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L2
│ │ LDC "f"
│ │ GOTO L1
│ │ L2
│ │ LDC "g"
│ │ L1
│ │ ┌─ ALOAD 0
│ │ │ LDC "h"
│ │ │ LDC "i"
│ │ └─ INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod4 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L3
│ │ LDC "j"
│ │ GOTO L4
│ │ L3
│ │ LDC "k"
│ │ L4
│ └─── INVOKEVIRTUAL simple/IdentifyCallExample.anotherMethod1 (Ljava/lang/String;Ljava/lang/String;)Z
│ IFEQ L5
│ LDC "l"
│ GOTO L6
│ L5
│ LDC "m"
│ L6
└───── INVOKEVIRTUAL java/io/PrintStream.println (Ljava/lang/String;)V
RETURN
当我们想要支持更复杂的接收者表达式或 static
方法时,其第一个参数可以是任意表达式,事情会变得更加复杂。 Frame<SourceValue>
允许我们识别将当前值推送到操作数堆栈的指令,但在像 a + b
这样的表达式的情况下,它只会是 iadd
指令,我们必须分析 iadd
指令的帧以获取其输入。与其为每一种指令实现它,更容易扩展解释器,获取信息并存储它,例如在 Map
中,因为 Analyzer
已经完成了这项工作。然后,我们可以递归地收集所有输入。
但这只提供了直接和间接的输入源,但是在条件表达式的情况下,我们还需要条件的输入。为此,我们必须识别并存储条件分支。每当报告输入可能来自不同的源指令时,我们必须检查相关分支并添加它们的条件。
然后我们再次使用简化假设,第一个和最后一个之间的所有指令也属于调用表达式。
更详细的代码看起来像
public class IdentifyCall {
private final InsnList instructions;
private final Map<AbstractInsnNode, Set<SourceValue>> sources;
private final TreeMap<int[],AbstractInsnNode> conditionals;
private IdentifyCall(InsnList il,
Map<AbstractInsnNode, Set<SourceValue>> s, TreeMap<int[], AbstractInsnNode> c) {
instructions = il;
sources = s;
conditionals = c;
}
Set<AbstractInsnNode> getAllInputsOf(AbstractInsnNode instr) {
Set<AbstractInsnNode> source = new HashSet<>();
List<SourceValue> pending = new ArrayList<>(sources.get(instr));
for (int pIx = 0; pIx < pending.size(); pIx++) {
SourceValue sv = pending.get(pIx);
final boolean branch = sv.insns.size() > 1;
for(AbstractInsnNode in: sv.insns) {
if(source.add(in))
pending.addAll(sources.getOrDefault(in, Collections.emptySet()));
if(branch) {
int ix = instructions.indexOf(in);
conditionals.forEach((b,i) -> {
if(b[0] <= ix && b[1] >= ix && source.add(i))
pending.addAll(sources.getOrDefault(i, Collections.emptySet()));
});
}
}
}
return source;
}
static IdentifyCall getInputs(
String internalClassName, MethodNode toAnalyze) throws AnalyzerException {
InsnList instructions = toAnalyze.instructions;
Map<AbstractInsnNode, Set<SourceValue>> sources = new HashMap<>();
SourceInterpreter i = new SourceInterpreter() {
@Override
public SourceValue unaryOperation(AbstractInsnNode insn, SourceValue value) {
sources.computeIfAbsent(insn, x -> new HashSet<>()).add(value);
return super.unaryOperation(insn, value);
}
@Override
public SourceValue binaryOperation(AbstractInsnNode insn, SourceValue v1, SourceValue v2) {
addAll(insn, Arrays.asList(v1, v2));
return super.binaryOperation(insn, v1, v2);
}
@Override
public SourceValue ternaryOperation(AbstractInsnNode insn, SourceValue v1, SourceValue v2, SourceValue v3) {
addAll(insn, Arrays.asList(v1, v2, v3));
return super.ternaryOperation(insn, v1, v2, v3);
}
@Override
public SourceValue naryOperation(AbstractInsnNode insn, List<? extends SourceValue> values) {
addAll(insn, values);
return super.naryOperation(insn, values);
}
private void addAll(AbstractInsnNode insn, List<? extends SourceValue> values) {
sources.computeIfAbsent(insn, x -> new HashSet<>()).addAll(values);
}
};
TreeMap<int[],AbstractInsnNode> conditionals = new TreeMap<>(
Comparator.comparingInt((int[] a) -> a[0]).thenComparingInt(a -> a[1]));
Analyzer<SourceValue> analyzer = new Analyzer<>(i) {
@Override
protected void newControlFlowEdge(int insn, int successor) {
if(insn != successor - 1) {
AbstractInsnNode instruction = instructions.get(insn);
Set<SourceValue> dep = sources.get(instruction);
if(dep != null && !dep.isEmpty())
conditionals.put(new int[]{ insn, successor }, instruction);
}
}
};
analyzer.analyze(internalClassName, toAnalyze);
return new IdentifyCall(instructions, sources, conditionals);
}
}
然后,我们还使用更详细的示例代码:
public class IdentifyCallExample {
private void toAnalyze() {
(Math.random()>0.5? System.out: System.err).println(
anotherMethod1(
anotherMethod2("a", "b") ?
"c" : anotherMethod3("d", "e") ? "f" : "g",
anotherMethod4("h", "i") ? "j" : "k"
) ? "l" : "m"
);
}
static boolean anotherMethod1(String str, String oof) {
return true;
}
static boolean anotherMethod2(String str, String oof) {
return true;
}
static boolean anotherMethod3(String str, String oof) {
return true;
}
static boolean anotherMethod4(String str, String oof) {
return true;
}
public static void main(String[] args) throws AnalyzerException, IOException {
Class<?> me = MethodHandles.lookup().lookupClass();
ClassReader r = new ClassReader(me.getResourceAsStream(me.getSimpleName()+".class"));
ClassNode cn = new ClassNode();
r.accept(cn, ClassReader.SKIP_DEBUG|ClassReader.SKIP_FRAMES);
MethodNode toAnalyze = null;
for(MethodNode mn: cn.methods)
if(mn.name.equals("toAnalyze")) {
toAnalyze = mn;
break;
}
List<int[]> invocations = new ArrayList<>();
final InsnList instructions = toAnalyze.instructions;
IdentifyCall sources = IdentifyCall.getInputs(me.getName().replace('.', '/'), toAnalyze);
for(int ix = 0, num = instructions.size(); ix < num; ix++) {
AbstractInsnNode instr = instructions.get(ix);
if(instr.getType() != AbstractInsnNode.METHOD_INSN) continue;
IntSummaryStatistics s = sources.getAllInputsOf(instr).stream()
.mapToInt(instructions::indexOf).summaryStatistics();
s.accept(ix);
invocations.add(new int[]{s.getMin(), s.getMax()});
}
printIt(invocations, instructions);
}
// remainder as in the simple variant
现在将打印
┌────[ INVOKESTATIC java/lang/Math.random ()D
│ LDC 0.5
│ DCMPL
│ IFLE L0
│ GETSTATIC java/lang/System.out : Ljava/io/PrintStream;
│ GOTO L1
│ L0
│ GETSTATIC java/lang/System.err : Ljava/io/PrintStream;
│ L1
│ ┌─┬─ LDC "a"
│ │ │ LDC "b"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod2 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L2
│ │ LDC "c"
│ │ GOTO L3
│ │ L2
│ │ ┌─ LDC "d"
│ │ │ LDC "e"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod3 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L4
│ │ LDC "f"
│ │ GOTO L3
│ │ L4
│ │ LDC "g"
│ │ L3
│ │ ┌─ LDC "h"
│ │ │ LDC "i"
│ │ └─ INVOKESTATIC complex/IdentifyCallExample.anotherMethod4 (Ljava/lang/String;Ljava/lang/String;)Z
│ │ IFEQ L5
│ │ LDC "j"
│ │ GOTO L6
│ │ L5
│ │ LDC "k"
│ │ L6
│ └─── INVOKESTATIC complex/IdentifyCallExample.anotherMethod1 (Ljava/lang/String;Ljava/lang/String;)Z
│ IFEQ L7
│ LDC "l"
│ GOTO L8
│ L7
│ LDC "m"
│ L8
└───── INVOKEVIRTUAL java/io/PrintStream.println (Ljava/lang/String;)V
RETURN
这可能仍然无法涵盖所有可能的情况,但对于您的用例来说已经足够了。