Skip to content

Commit be6f9a6

Browse files
committed
[IR2Vec][llvm-ir2vec] Supporting flow-aware embeddings
1 parent 91ea617 commit be6f9a6

File tree

4 files changed

+93
-6
lines changed

4 files changed

+93
-6
lines changed

llvm/docs/CommandGuide/llvm-ir2vec.rst

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ DESCRIPTION
1313

1414
:program:`llvm-ir2vec` is a standalone command-line tool for IR2Vec. It
1515
generates IR2Vec embeddings for LLVM IR and supports triplet generation
16-
for vocabulary training. The tool provides three main subcommands:
16+
for vocabulary training.
17+
18+
The tool provides three main subcommands:
1719

1820
1. **triplets**: Generates numeric triplets in train2id format for vocabulary
1921
training from LLVM IR.
@@ -93,7 +95,7 @@ Example Usage:
9395

9496
.. code-block:: bash
9597
96-
llvm-ir2vec embeddings --ir2vec-vocab-path=vocab.json --level=func input.bc -o embeddings.txt
98+
llvm-ir2vec embeddings --ir2vec-vocab-path=vocab.json --ir2vec-kind=symbolic --level=func input.bc -o embeddings.txt
9799
98100
OPTIONS
99101
-------
@@ -129,6 +131,16 @@ Subcommand-specific options:
129131

130132
Process only the specified function instead of all functions in the module.
131133

134+
.. option:: --ir2vec-kind=<kind>
135+
136+
Specify the kind of IR2Vec embeddings to generate. Valid values are:
137+
138+
* ``symbolic`` - Generate symbolic embeddings (default)
139+
* ``flow-aware`` - Generate flow-aware embeddings
140+
141+
Flow-aware embeddings consider control flow relationships between instructions,
142+
while symbolic embeddings focus on the symbolic representation of instructions.
143+
132144
.. option:: --ir2vec-vocab-path=<path>
133145

134146
Specify the path to the vocabulary file (required for embedding generation).
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; RUN: llvm-ir2vec embeddings --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT
2+
; RUN: llvm-ir2vec embeddings --level=func --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL
3+
; RUN: llvm-ir2vec embeddings --level=func --function=abc --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL-ABC
4+
; RUN: not llvm-ir2vec embeddings --level=func --function=def --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-DEF
5+
; RUN: llvm-ir2vec embeddings --level=bb --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL
6+
; RUN: llvm-ir2vec embeddings --level=bb --function=abc_repeat --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL-ABC-REPEAT
7+
; RUN: llvm-ir2vec embeddings --level=inst --function=abc_repeat --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-LEVEL-ABC-REPEAT
8+
9+
define dso_local noundef float @abc(i32 noundef %a, float noundef %b) #0 {
10+
entry:
11+
%a.addr = alloca i32, align 4
12+
%b.addr = alloca float, align 4
13+
store i32 %a, ptr %a.addr, align 4
14+
store float %b, ptr %b.addr, align 4
15+
%0 = load i32, ptr %a.addr, align 4
16+
%1 = load i32, ptr %a.addr, align 4
17+
%mul = mul nsw i32 %0, %1
18+
%conv = sitofp i32 %mul to float
19+
%2 = load float, ptr %b.addr, align 4
20+
%add = fadd float %conv, %2
21+
ret float %add
22+
}
23+
24+
define dso_local noundef float @abc_repeat(i32 noundef %a, float noundef %b) #0 {
25+
entry:
26+
%a.addr = alloca i32, align 4
27+
%b.addr = alloca float, align 4
28+
store i32 %a, ptr %a.addr, align 4
29+
store float %b, ptr %b.addr, align 4
30+
%0 = load i32, ptr %a.addr, align 4
31+
%1 = load i32, ptr %a.addr, align 4
32+
%mul = mul nsw i32 %0, %1
33+
%conv = sitofp i32 %mul to float
34+
%2 = load float, ptr %b.addr, align 4
35+
%add = fadd float %conv, %2
36+
ret float %add
37+
}
38+
39+
; CHECK-DEFAULT: Function: abc
40+
; CHECK-DEFAULT-NEXT: [ 3630.00 3672.00 3714.00 ]
41+
; CHECK-DEFAULT-NEXT: Function: abc_repeat
42+
; CHECK-DEFAULT-NEXT: [ 3630.00 3672.00 3714.00 ]
43+
44+
; CHECK-FUNC-LEVEL: Function: abc
45+
; CHECK-FUNC-LEVEL-NEXT: [ 3630.00 3672.00 3714.00 ]
46+
; CHECK-FUNC-LEVEL-NEXT: Function: abc_repeat
47+
; CHECK-FUNC-LEVEL-NEXT: [ 3630.00 3672.00 3714.00 ]
48+
49+
; CHECK-FUNC-LEVEL-ABC: Function: abc
50+
; CHECK-FUNC-LEVEL-NEXT-ABC: [ 3630.00 3672.00 3714.00 ]
51+
52+
; CHECK-FUNC-DEF: Error: Function 'def' not found
53+
54+
; CHECK-BB-LEVEL: Function: abc
55+
; CHECK-BB-LEVEL-NEXT: entry: [ 3630.00 3672.00 3714.00 ]
56+
; CHECK-BB-LEVEL-NEXT: Function: abc_repeat
57+
; CHECK-BB-LEVEL-NEXT: entry: [ 3630.00 3672.00 3714.00 ]
58+
59+
; CHECK-BB-LEVEL-ABC-REPEAT: Function: abc_repeat
60+
; CHECK-BB-LEVEL-ABC-REPEAT-NEXT: entry: [ 3630.00 3672.00 3714.00 ]
61+
62+
; CHECK-INST-LEVEL-ABC-REPEAT: Function: abc_repeat
63+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %a.addr = alloca i32, align 4 [ 91.00 92.00 93.00 ]
64+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %b.addr = alloca float, align 4 [ 91.00 92.00 93.00 ]
65+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store i32 %a, ptr %a.addr, align 4 [ 188.00 190.00 192.00 ]
66+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: store float %b, ptr %b.addr, align 4 [ 188.00 190.00 192.00 ]
67+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %0 = load i32, ptr %a.addr, align 4 [ 185.00 187.00 189.00 ]
68+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %1 = load i32, ptr %a.addr, align 4 [ 185.00 187.00 189.00 ]
69+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %mul = mul nsw i32 %0, %1 [ 419.00 424.00 429.00 ]
70+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %conv = sitofp i32 %mul to float [ 549.00 555.00 561.00 ]
71+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %2 = load float, ptr %b.addr, align 4 [ 185.00 187.00 189.00 ]
72+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: %add = fadd float %conv, %2 [ 774.00 783.00 792.00 ]
73+
; CHECK-INST-LEVEL-ABC-REPEAT-NEXT: ret float %add [ 775.00 785.00 795.00 ]

llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525
/// 3. Embedding Generation (embeddings):
2626
/// Generates IR2Vec embeddings using a trained vocabulary.
2727
/// Usage: llvm-ir2vec embeddings --ir2vec-vocab-path=vocab.json
28-
/// --level=func input.bc -o embeddings.txt Levels: --level=inst
29-
/// (instructions), --level=bb (basic blocks), --level=func (functions)
30-
/// (See IR2Vec.cpp for more embedding generation options)
28+
/// --ir2vec-kind=<kind> --level=<level> input.bc -o embeddings.txt
29+
/// Kind: --ir2vec-kind=symbolic (default), --ir2vec-kind=flow-aware
30+
/// Levels: --level=inst (instructions), --level=bb (basic blocks),
31+
/// --level=func (functions) (See IR2Vec.cpp for more embedding generation
32+
/// options)
3133
///
3234
//===----------------------------------------------------------------------===//
3335

@@ -243,7 +245,7 @@ class IR2VecTool {
243245

244246
// Create embedder for this function
245247
assert(Vocab->isValid() && "Vocabulary is not valid");
246-
auto Emb = Embedder::create(IR2VecKind::Symbolic, F, *Vocab);
248+
auto Emb = Embedder::create(IR2VecEmbeddingKind, F, *Vocab);
247249
if (!Emb) {
248250
OS << "Error: Failed to create embedder for function " << F.getName()
249251
<< "\n";

0 commit comments

Comments
 (0)