Skip to content

Commit 75f0206

Browse files
Merged main:8a8ef1cacfcd7745d2b6ad00431e6fa9ab9a2fb4 into amd-gfx:c6ac0bd36762
Local branch amd-gfx c6ac0bd Merged main:ce1fd9281707c2163728085d126ff83041e1db51 into amd-gfx:f2b15bee0911 Remote branch main 8a8ef1c [flang][cuda] Enable cuda with -x cuda option (llvm#84944) Change-Id: Ibfc5b2c9bbcfb821a2287aa74c062f3d46ca6e4f
2 parents c6ac0bd + 8a8ef1c commit 75f0206

File tree

789 files changed

+17751
-6572
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

789 files changed

+17751
-6572
lines changed

bolt/lib/Rewrite/LinuxKernelRewriter.cpp

+93-33
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ static cl::opt<bool> DumpParavirtualPatchSites(
5555
"dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
5656
cl::init(false), cl::Hidden, cl::cat(BoltCategory));
5757

58+
static cl::opt<bool>
59+
DumpPCIFixups("dump-pci-fixups",
60+
cl::desc("dump Linux kernel PCI fixup table"),
61+
cl::init(false), cl::Hidden, cl::cat(BoltCategory));
62+
5863
static cl::opt<bool> DumpStaticCalls("dump-static-calls",
5964
cl::desc("dump Linux kernel static calls"),
6065
cl::init(false), cl::Hidden,
@@ -181,6 +186,10 @@ class LinuxKernelRewriter final : public MetadataRewriter {
181186
/// Size of bug_entry struct.
182187
static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
183188

189+
/// .pci_fixup section.
190+
ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
191+
static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
192+
184193
/// Insert an LKMarker for a given code pointer \p PC from a non-code section
185194
/// \p SectionName.
186195
void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -190,9 +199,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
190199
/// Process linux kernel special sections and their relocations.
191200
void processLKSections();
192201

193-
/// Process special linux kernel section, .pci_fixup.
194-
void processLKPCIFixup();
195-
196202
/// Process __ksymtab and __ksymtab_gpl.
197203
void processLKKSymtab(bool IsGPL = false);
198204

@@ -226,6 +232,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
226232
/// Read alternative instruction info from .altinstructions.
227233
Error readAltInstructions();
228234

235+
/// Read .pci_fixup
236+
Error readPCIFixupTable();
237+
229238
/// Mark instructions referenced by kernel metadata.
230239
Error markInstructions();
231240

@@ -256,6 +265,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
256265
if (Error E = readAltInstructions())
257266
return E;
258267

268+
if (Error E = readPCIFixupTable())
269+
return E;
270+
259271
return Error::success();
260272
}
261273

@@ -318,41 +330,11 @@ void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
318330
}
319331

320332
void LinuxKernelRewriter::processLKSections() {
321-
processLKPCIFixup();
322333
processLKKSymtab();
323334
processLKKSymtab(true);
324335
processLKSMPLocks();
325336
}
326337

327-
/// Process .pci_fixup section of Linux Kernel.
328-
/// This section contains a list of entries for different PCI devices and their
329-
/// corresponding hook handler (code pointer where the fixup
330-
/// code resides, usually on x86_64 it is an entry PC relative 32 bit offset).
331-
/// Documentation is in include/linux/pci.h.
332-
void LinuxKernelRewriter::processLKPCIFixup() {
333-
ErrorOr<BinarySection &> SectionOrError =
334-
BC.getUniqueSectionByName(".pci_fixup");
335-
if (!SectionOrError)
336-
return;
337-
338-
const uint64_t SectionSize = SectionOrError->getSize();
339-
const uint64_t SectionAddress = SectionOrError->getAddress();
340-
assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
341-
342-
for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) {
343-
const uint64_t PC = SectionAddress + I;
344-
ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(PC, 4);
345-
assert(Offset && "cannot read value from .pci_fixup");
346-
const int32_t SignedOffset = *Offset;
347-
const uint64_t HookupAddress = PC + SignedOffset;
348-
BinaryFunction *HookupFunction =
349-
BC.getBinaryFunctionAtAddress(HookupAddress);
350-
assert(HookupFunction && "expected function for entry in .pci_fixup");
351-
BC.addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0,
352-
*Offset);
353-
}
354-
}
355-
356338
/// Process __ksymtab[_gpl] sections of Linux Kernel.
357339
/// This section lists all the vmlinux symbols that kernel modules can access.
358340
///
@@ -1283,6 +1265,84 @@ Error LinuxKernelRewriter::readAltInstructions() {
12831265
return Error::success();
12841266
}
12851267

1268+
/// When the Linux kernel needs to handle an error associated with a given PCI
1269+
/// device, it uses a table stored in .pci_fixup section to locate a fixup code
1270+
/// specific to the vendor and the problematic device. The section contains a
1271+
/// list of the following structures defined in include/linux/pci.h:
1272+
///
1273+
/// struct pci_fixup {
1274+
/// u16 vendor; /* Or PCI_ANY_ID */
1275+
/// u16 device; /* Or PCI_ANY_ID */
1276+
/// u32 class; /* Or PCI_ANY_ID */
1277+
/// unsigned int class_shift; /* should be 0, 8, 16 */
1278+
/// int hook_offset;
1279+
/// };
1280+
///
1281+
/// Normally, the hook will point to a function start and we don't have to
1282+
/// update the pointer if we are not relocating functions. Hence, while reading
1283+
/// the table we validate this assumption. If a function has a fixup code in the
1284+
/// middle of its body, we issue a warning and ignore it.
1285+
Error LinuxKernelRewriter::readPCIFixupTable() {
1286+
PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup");
1287+
if (!PCIFixupSection)
1288+
return Error::success();
1289+
1290+
if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1291+
return createStringError(errc::executable_format_error,
1292+
"PCI fixup table size error");
1293+
1294+
const uint64_t Address = PCIFixupSection->getAddress();
1295+
DataExtractor DE = DataExtractor(PCIFixupSection->getContents(),
1296+
BC.AsmInfo->isLittleEndian(),
1297+
BC.AsmInfo->getCodePointerSize());
1298+
uint64_t EntryID = 0;
1299+
DataExtractor::Cursor Cursor(0);
1300+
while (Cursor && !DE.eof(Cursor)) {
1301+
const uint16_t Vendor = DE.getU16(Cursor);
1302+
const uint16_t Device = DE.getU16(Cursor);
1303+
const uint32_t Class = DE.getU32(Cursor);
1304+
const uint32_t ClassShift = DE.getU32(Cursor);
1305+
const uint64_t HookAddress =
1306+
Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1307+
1308+
if (!Cursor)
1309+
return createStringError(errc::executable_format_error,
1310+
"out of bounds while reading .pci_fixup: %s",
1311+
toString(Cursor.takeError()).c_str());
1312+
1313+
++EntryID;
1314+
1315+
if (opts::DumpPCIFixups) {
1316+
BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x"
1317+
<< Twine::utohexstr(Vendor) << "\n\tDevice: 0x"
1318+
<< Twine::utohexstr(Device) << "\n\tClass: 0x"
1319+
<< Twine::utohexstr(Class) << "\n\tClassShift: 0x"
1320+
<< Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x"
1321+
<< Twine::utohexstr(HookAddress) << '\n';
1322+
}
1323+
1324+
BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress);
1325+
if (!BF && opts::Verbosity) {
1326+
BC.outs() << "BOLT-INFO: no function matches address 0x"
1327+
<< Twine::utohexstr(HookAddress)
1328+
<< " of hook from .pci_fixup\n";
1329+
}
1330+
1331+
if (!BF || !BC.shouldEmit(*BF))
1332+
continue;
1333+
1334+
if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1335+
BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1336+
<< *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1337+
BF->setSimple(false);
1338+
}
1339+
}
1340+
1341+
BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1342+
1343+
return Error::success();
1344+
}
1345+
12861346
} // namespace
12871347

12881348
std::unique_ptr<MetadataRewriter>

bolt/test/X86/linux-pci-fixup.s

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# REQUIRES: system-linux
2+
3+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
4+
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
5+
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
6+
# RUN: llvm-bolt %t.exe --print-normalized -o %t.out |& FileCheck %s
7+
8+
## Check that BOLT correctly parses the Linux kernel .pci_fixup section and
9+
## verify that PCI fixup hook in the middle of a function is detected.
10+
11+
# CHECK: BOLT-INFO: Linux kernel binary detected
12+
# CHECK: BOLT-WARNING: PCI fixup detected in the middle of function _start
13+
# CHECK: BOLT-INFO: parsed 2 PCI fixup entries
14+
15+
.text
16+
.globl _start
17+
.type _start, %function
18+
_start:
19+
nop
20+
.L0:
21+
ret
22+
.size _start, .-_start
23+
24+
## PCI fixup table.
25+
.section .pci_fixup,"a",@progbits
26+
27+
.short 0x8086 # vendor
28+
.short 0xbeef # device
29+
.long 0xffffffff # class
30+
.long 0x0 # class shift
31+
.long _start - . # fixup
32+
33+
.short 0x8086 # vendor
34+
.short 0xbad # device
35+
.long 0xffffffff # class
36+
.long 0x0 # class shift
37+
.long .L0 - . # fixup
38+
39+
## Fake Linux Kernel sections.
40+
.section __ksymtab,"a",@progbits
41+
.section __ksymtab_gpl,"a",@progbits

clang/cmake/caches/Fuchsia.cmake

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ set(_FUCHSIA_BOOTSTRAP_PASSTHROUGH
6565
LLDB_EMBED_PYTHON_HOME
6666
LLDB_PYTHON_HOME
6767
LLDB_PYTHON_RELATIVE_PATH
68+
LLDB_TEST_USE_VENDOR_PACKAGES
6869
Python3_EXECUTABLE
6970
Python3_LIBRARIES
7071
Python3_INCLUDE_DIRS

clang/docs/ReleaseNotes.rst

+18
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ Removed Compiler Flags
194194

195195
Attribute Changes in Clang
196196
--------------------------
197+
- Introduced a new function attribute ``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or
198+
``[[clang::amdgpu_max_num_work_groups(x, y, z)]]`` for the AMDGPU target. This attribute can be
199+
attached to HIP or OpenCL kernel function definitions to provide an optimization hint. The parameters
200+
``x``, ``y``, and ``z`` specify the maximum number of workgroups for the respective dimensions,
201+
and each must be a positive integer when provided. The parameter ``x`` is required, while ``y`` and
202+
``z`` are optional with default value of 1.
197203

198204
Improvements to Clang's diagnostics
199205
-----------------------------------
@@ -265,6 +271,9 @@ Bug Fixes in This Version
265271
operator.
266272
Fixes (#GH83267).
267273

274+
- Clang now correctly generates overloads for bit-precise integer types for
275+
builtin operators in C++. Fixes #GH82998.
276+
268277
Bug Fixes to Compiler Builtins
269278
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
270279

@@ -351,6 +360,8 @@ Bug Fixes to C++ Support
351360
when one of the function had more specialized templates.
352361
Fixes (`#82509 <https://github.com/llvm/llvm-project/issues/82509>`_)
353362
and (`#74494 <https://github.com/llvm/llvm-project/issues/74494>`_)
363+
- Allow access to a public template alias declaration that refers to friend's
364+
private nested type. (#GH25708).
354365

355366
Bug Fixes to AST Handling
356367
^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -447,6 +458,8 @@ AST Matchers
447458

448459
- ``isInStdNamespace`` now supports Decl declared with ``extern "C++"``.
449460
- Add ``isExplicitObjectMemberFunction``.
461+
- Fixed ``forEachArgumentWithParam`` and ``forEachArgumentWithParamType`` to
462+
not skip the explicit object parameter for operator calls.
450463

451464
clang-format
452465
------------
@@ -497,6 +510,11 @@ Python Binding Changes
497510

498511
- Exposed `CXRewriter` API as `class Rewriter`.
499512

513+
OpenMP Support
514+
--------------
515+
516+
- Added support for the `[[omp::assume]]` attribute.
517+
500518
Additional Information
501519
======================
502520

clang/docs/UsersManual.rst

+61
Original file line numberDiff line numberDiff line change
@@ -4227,7 +4227,68 @@ Clang expects the GCC executable "gcc.exe" compiled for
42274227

42284228
AIX
42294229
^^^
4230+
TOC Data Transformation
4231+
"""""""""""""""""""""""
4232+
TOC data transformation is off by default (``-mno-tocdata``).
4233+
When ``-mtocdata`` is specified, the TOC data transformation will be applied to
4234+
all suitable variables with static storage duration, including static data
4235+
members of classes and block-scope static variables (if not marked as exceptions,
4236+
see further below).
42304237

4238+
Suitable variables must:
4239+
4240+
- have complete types
4241+
- be independently generated (i.e., not placed in a pool)
4242+
- be at most as large as a pointer
4243+
- not be aligned more strictly than a pointer
4244+
- not be structs containing flexible array members
4245+
- not have internal linkage
4246+
- not have aliases
4247+
- not have section attributes
4248+
- not be thread local storage
4249+
4250+
The TOC data transformation results in the variable, not its address,
4251+
being placed in the TOC. This eliminates the need to load the address of the
4252+
variable from the TOC.
4253+
4254+
Note:
4255+
If the TOC data transformation is applied to a variable whose definition
4256+
is imported, the linker will generate fixup code for reading or writing to the
4257+
variable.
4258+
4259+
When multiple toc-data options are used, the last option used has the affect.
4260+
For example: -mno-tocdata=g5,g1 -mtocdata=g1,g2 -mno-tocdata=g2 -mtocdata=g3,g4
4261+
results in -mtocdata=g1,g3,g4
4262+
4263+
Names of variables not having external linkage will be ignored.
4264+
4265+
**Options:**
4266+
4267+
.. option:: -mno-tocdata
4268+
4269+
This is the default behaviour. Only variables explicitly specified with
4270+
``-mtocdata=`` will have the TOC data transformation applied.
4271+
4272+
.. option:: -mtocdata
4273+
4274+
Apply the TOC data transformation to all suitable variables with static
4275+
storage duration (including static data members of classes and block-scope
4276+
static variables) that are not explicitly specified with ``-mno-tocdata=``.
4277+
4278+
.. option:: -mno-tocdata=
4279+
4280+
Can be used in conjunction with ``-mtocdata`` to mark the comma-separated
4281+
list of external linkage variables, specified using their mangled names, as
4282+
exceptions to ``-mtocdata``.
4283+
4284+
.. option:: -mtocdata=
4285+
4286+
Apply the TOC data transformation to the comma-separated list of external
4287+
linkage variables, specified using their mangled names, if they are suitable.
4288+
Emit diagnostics for all unsuitable variables specified.
4289+
4290+
Default Visibility Export Mapping
4291+
"""""""""""""""""""""""""""""""""
42314292
The ``-mdefault-visibility-export-mapping=`` option can be used to control
42324293
mapping of default visibility to an explicit shared object export
42334294
(i.e. XCOFF exported visibility). Three values are provided for the option:

0 commit comments

Comments
 (0)