Skip to content

Commit 6159242

Browse files
committed
Support ignore_noninstrumented_modules on Linux
Land a proposed upstream change so I can continue to make progress on the Swift side. If the upstream patch changes before it lands, this will create a merge conflict which I will fix by aligning this change with whatever we decided to do upstream. Original upstream patch description/commit message: We implemented the `ignore_noninstrumented_modules` flag for Darwin [1] to be able to use TSan on Darwin, where the "re-compile the world" approach is difficult to follow in practice. Now we are in the same situation when adding TSan support for Swift on Linux. The libdispatch library is a source of false positives and compiling it with TSan (although it is compiled from source on Linux) proves to be tricky and insufficient. The better path seems to be to make `ignore_noninstrumented_modules` work on Linux. The missing piece with this functionality on Linux is the detection whether or not a loaded library is instrumented (which is suprisingly difficult). In this patch we use the ELF segment metadata and symbol table to check whether any sanitizer symbols are present. [2,3] were useful resources in the development of this patch (especially, `getSymbolCountFromGnuHash`). Also enable flag for libdispatch tests on Linux to make them pass. The default value for this flag on Linux remains OFF. [1]: https://reviews.llvm.org/D28264 [2]: https://chromium.googlesource.com/crashpad/crashpad/+/1f1657d573c789aa36b6022440e34d9ec30d894c%5E%21/ [3]: https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ Differential Revision: https://reviews.llvm.org/D61708 apple-llvm-split-commit: eb0abd168a7ae46fa7c622303f3dcc4e11f956d0 apple-llvm-split-dir: compiler-rt/
1 parent 18083d6 commit 6159242

File tree

6 files changed

+203
-10
lines changed

6 files changed

+203
-10
lines changed

compiler-rt/lib/sanitizer_common/sanitizer_common.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -127,19 +127,20 @@ void RemoveANSIEscapeSequencesFromString(char *str) {
127127
*z = '\0';
128128
}
129129

130-
void LoadedModule::set(const char *module_name, uptr base_address) {
130+
void LoadedModule::set(const char *module_name, uptr base_address,
131+
bool instrumented) {
131132
clear();
132133
full_name_ = internal_strdup(module_name);
133134
base_address_ = base_address;
135+
instrumented_ = instrumented;
134136
}
135137

136138
void LoadedModule::set(const char *module_name, uptr base_address,
137139
ModuleArch arch, u8 uuid[kModuleUUIDSize],
138140
bool instrumented) {
139-
set(module_name, base_address);
141+
set(module_name, base_address, instrumented);
140142
arch_ = arch;
141143
internal_memcpy(uuid_, uuid, sizeof(uuid_));
142-
instrumented_ = instrumented;
143144
}
144145

145146
void LoadedModule::clear() {

compiler-rt/lib/sanitizer_common/sanitizer_common.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,8 @@ class LoadedModule {
711711
internal_memset(uuid_, 0, kModuleUUIDSize);
712712
ranges_.clear();
713713
}
714-
void set(const char *module_name, uptr base_address);
714+
void set(const char *module_name, uptr base_address,
715+
bool instrumented = false);
715716
void set(const char *module_name, uptr base_address, ModuleArch arch,
716717
u8 uuid[kModuleUUIDSize], bool instrumented);
717718
void clear();

compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cc

+122-1
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,126 @@ struct DlIteratePhdrData {
541541
bool first;
542542
};
543543

544+
class DynamicSegment {
545+
ElfW(Addr) base_addr;
546+
size_t symbol_count;
547+
ElfW(Sym *) symbol_table;
548+
const char *string_table;
549+
550+
public:
551+
DynamicSegment(ElfW(Addr) base_addr, ElfW(Addr) segment_addr)
552+
: base_addr(base_addr),
553+
symbol_count(0),
554+
symbol_table(nullptr),
555+
string_table(nullptr) {
556+
initialize(segment_addr);
557+
CHECK(symbol_count > 0 && symbol_table && string_table);
558+
}
559+
560+
size_t symbolCount() const { return symbol_count; }
561+
562+
const char *getSymbolName(size_t index) const {
563+
auto str_index = symbol_table[index].st_name;
564+
return &string_table[str_index];
565+
}
566+
567+
private:
568+
// Elf_Addr -> dereferenceable pointer
569+
template <typename Type>
570+
Type *toPtr(ElfW(Addr) addr_or_offset) const {
571+
bool offset = addr_or_offset < base_addr;
572+
ElfW(Addr) ptr = offset ? (base_addr + addr_or_offset) : addr_or_offset;
573+
return reinterpret_cast<Type *>(ptr);
574+
}
575+
576+
void initialize(ElfW(Addr) segment_addr) {
577+
auto *entry = toPtr<ElfW(Dyn)>(segment_addr);
578+
for (; entry->d_tag != DT_NULL; entry++) {
579+
auto addr = entry->d_un.d_ptr;
580+
switch (entry->d_tag) {
581+
case DT_HASH:
582+
symbol_count = getSymbolCountFromHash(addr);
583+
break;
584+
case DT_GNU_HASH:
585+
// DT_HASH takes precedence over DT_GNU_HASH
586+
if (symbol_count > 0)
587+
break;
588+
symbol_count = getSymbolCountFromGnuHash(addr);
589+
break;
590+
case DT_SYMTAB:
591+
CHECK_EQ(symbol_table, nullptr);
592+
symbol_table = toPtr<ElfW(Sym)>(addr);
593+
break;
594+
case DT_STRTAB:
595+
CHECK_EQ(string_table, nullptr);
596+
string_table = toPtr<const char>(addr);
597+
break;
598+
}
599+
}
600+
}
601+
602+
size_t getSymbolCountFromHash(ElfW(Addr) hashtable_addr) const {
603+
struct ht_header {
604+
uint32_t bucket_count;
605+
uint32_t chain_count;
606+
};
607+
return toPtr<ht_header>(hashtable_addr)->chain_count;
608+
}
609+
610+
size_t getSymbolCountFromGnuHash(ElfW(Addr) hashtable_addr) const {
611+
struct ht_header {
612+
uint32_t bucket_count;
613+
uint32_t symoffset;
614+
uint32_t bloom_size;
615+
uint32_t bloom_shift;
616+
};
617+
auto header = toPtr<ht_header>(hashtable_addr);
618+
auto word_size = FIRST_32_SECOND_64(sizeof(uint32_t), sizeof(uint64_t));
619+
auto buckets_addr =
620+
hashtable_addr + sizeof(ht_header) + (word_size * header->bloom_size);
621+
auto buckets = toPtr<uint32_t>(buckets_addr);
622+
auto chains_addr =
623+
buckets_addr + (header->bucket_count * sizeof(buckets[0]));
624+
auto chains = toPtr<uint32_t>(chains_addr);
625+
626+
// Locate the chain that handles the largest index bucket.
627+
uint32_t last_symbol = 0;
628+
for (uint32_t i = 0; i < header->bucket_count; i++) {
629+
last_symbol = Max(buckets[i], last_symbol);
630+
}
631+
632+
// Walk the bucket's chain to add the chain length to the total.
633+
uint32_t chain_entry;
634+
do {
635+
chain_entry = chains[last_symbol - header->symoffset];
636+
last_symbol++;
637+
} while ((chain_entry & 1) == 0);
638+
639+
return last_symbol;
640+
}
641+
};
642+
643+
static bool IsModuleInstrumented(dl_phdr_info *info) {
644+
// Iterate all headers of the library.
645+
for (size_t header = 0; header < info->dlpi_phnum; header++) {
646+
// We are only interested in dynamic segments.
647+
if (info->dlpi_phdr[header].p_type != PT_DYNAMIC)
648+
continue;
649+
650+
auto base_addr = info->dlpi_addr;
651+
auto segment_addr = info->dlpi_phdr[header].p_vaddr;
652+
DynamicSegment segment(base_addr, segment_addr);
653+
654+
// Iterate symbol table.
655+
for (size_t i = 0; i < segment.symbolCount(); i++) {
656+
auto *name = segment.getSymbolName(i);
657+
if (internal_strcmp(name, "__tsan_init") == 0)
658+
return true;
659+
}
660+
}
661+
return false;
662+
}
663+
544664
static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
545665
DlIteratePhdrData *data = (DlIteratePhdrData*)arg;
546666
InternalScopedString module_name(kMaxPathLength);
@@ -554,7 +674,8 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
554674
if (module_name[0] == '\0')
555675
return 0;
556676
LoadedModule cur_module;
557-
cur_module.set(module_name.data(), info->dlpi_addr);
677+
bool instrumented = IsModuleInstrumented(info);
678+
cur_module.set(module_name.data(), info->dlpi_addr, instrumented);
558679
for (int i = 0; i < (int)info->dlpi_phnum; i++) {
559680
const Elf_Phdr *phdr = &info->dlpi_phdr[i];
560681
if (phdr->p_type == PT_LOAD) {

compiler-rt/test/tsan/Darwin/ignore-noninstrumented.mm

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
// RUN: %clang_tsan %s -o %t -framework Foundation
77

88
// Check that without the flag, there are false positives.
9-
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=0 %deflake %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
9+
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=0 %deflake %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
1010

1111
// With ignore_noninstrumented_modules=1, no races are reported.
12-
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s
12+
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --implicit-check-not='ThreadSanitizer'
1313

1414
// With ignore_noninstrumented_modules=1, races in user's code are still reported.
15-
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %deflake %run %t race 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RACE
15+
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %deflake %run %t race 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
1616

1717
#import <Foundation/Foundation.h>
1818

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Check that ignore_noninstrumented_modules=1 suppresses reports originating
2+
// from interceptors that are called from an un-instrumented library.
3+
4+
// RUN: %clangxx_tsan %s -fPIC -shared -DLIBRARY -fno-sanitize=thread -o %t.library.so
5+
// RUN: %clangxx_tsan %s %t.library.so -o %t
6+
7+
// Check that without the flag, there are false positives.
8+
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=0 %deflake %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
9+
10+
// With ignore_noninstrumented_modules=1, no races are reported.
11+
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --implicit-check-not='ThreadSanitizer'
12+
13+
// With ignore_noninstrumented_modules=1, races in user's code are still reported.
14+
// RUN: %env_tsan_opts=ignore_noninstrumented_modules=1 %deflake %run %t race 2>&1 | FileCheck %s --check-prefix=CHECK-RACE
15+
16+
#include "test.h"
17+
18+
#include <cstring>
19+
20+
#ifdef LIBRARY
21+
namespace library {
22+
#endif
23+
char global_buf[64];
24+
25+
void *Thread1(void *x) {
26+
barrier_wait(&barrier);
27+
strcpy(global_buf, "hello world"); // NOLINT
28+
return NULL;
29+
}
30+
31+
void *Thread2(void *x) {
32+
strcpy(global_buf, "world hello"); // NOLINT
33+
barrier_wait(&barrier);
34+
return NULL;
35+
}
36+
37+
void Race() {
38+
barrier_init(&barrier, 2);
39+
pthread_t t[2];
40+
pthread_create(&t[0], NULL, Thread1, NULL);
41+
pthread_create(&t[1], NULL, Thread2, NULL);
42+
pthread_join(t[0], NULL);
43+
pthread_join(t[1], NULL);
44+
}
45+
#ifdef LIBRARY
46+
} // namespace library
47+
#endif
48+
49+
#ifndef LIBRARY
50+
namespace library {
51+
void Race();
52+
}
53+
54+
int main(int argc, char *argv[]) {
55+
fprintf(stderr, "Hello world.\n");
56+
57+
// Race in un-instrumented library
58+
library::Race();
59+
60+
// Race in user code, if requested
61+
if (argc > 1 && strcmp(argv[1], "race") == 0)
62+
Race();
63+
64+
fprintf(stderr, "Done.\n");
65+
}
66+
67+
#endif // LIBRARY
68+
69+
// CHECK: Hello world.
70+
// CHECK-RACE: SUMMARY: ThreadSanitizer: data race
71+
// CHECK: Done.

compiler-rt/test/tsan/libdispatch/lit.local.cfg

+1-2
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,4 @@ if 'libdispatch' in root.available_features:
1313
else:
1414
config.unsupported = True
1515

16-
if config.host_os == 'Darwin':
17-
config.environment['TSAN_OPTIONS'] += ':ignore_noninstrumented_modules=1'
16+
config.environment['TSAN_OPTIONS'] += ':ignore_noninstrumented_modules=1'

0 commit comments

Comments
 (0)