Skip to content

Commit

Permalink
Merge branch 'refs/heads/550.127.08-p2p' into 550.142-p2p
Browse files Browse the repository at this point in the history
# Conflicts:
#	README.md
  • Loading branch information
bold84 committed Dec 19, 2024
2 parents 346011d + 1d243c2 commit 4638499
Show file tree
Hide file tree
Showing 11 changed files with 238 additions and 975 deletions.
51 changes: 51 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: Build

on:
push:

permissions:
contents: write

jobs:
build-deb:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: sudo apt-get install -y build-essential dpkg-dev
- name: Download source deb
run: |
branch=$(echo $GITHUB_REF | cut -d'/' -f3)
branch=${branch/-p2p/}
# download source deb
curl -o source.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/nvidia-kernel-source-550-open_${branch}-0ubuntu1_amd64.deb
- name: Extract source deb
run: dpkg-deb -R source.deb source-deb
- name: Build kernel module
run: |
make modules -j$(nproc)
- name: Replace kernel module source
run: |
branch=$(echo $GITHUB_REF | cut -d'/' -f3)
branch=${branch/-p2p/}
cp kernel-open/nvidia/nv-pci.c source-deb/usr/src/nvidia-${branch}/nvidia/nv-pci.c
- name: Replace kernel module
run: |
branch=$(echo $GITHUB_REF | cut -d'/' -f3)
branch=${branch/-p2p/}
cp src/nvidia/_out/Linux_x86_64/nv-kernel.o source-deb/usr/src/nvidia-${branch}/nvidia/nv-kernel.o_binary
cp src/nvidia-modeset/_out/Linux_x86_64/nv-modeset-kernel.o source-deb/usr/src/nvidia-${branch}/nvidia-modeset/nv-modeset-kernel.o_binary
- name: Repack deb
run: dpkg-deb -b source-deb nvidia-kernel-source-550-open-0ubuntu1_amd64.deb
- name: Upload deb
uses: actions/upload-artifact@v4
with:
name: nvidia-kernel-source-550-open-0ubuntu1_amd64.deb
path: nvidia-kernel-source-550-open-0ubuntu1_amd64.deb
- name: Create release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: nvidia-kernel-source-550-open-0ubuntu1_amd64.deb
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1,061 changes: 125 additions & 936 deletions README.md

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
sudo rmmod nvidia_drm nvidia_modeset nvidia_uvm nvidia
set -e
make modules -j$(nproc)
sudo make modules_install -j$(nproc)
sudo depmod
nvidia-smi
8 changes: 4 additions & 4 deletions kernel-open/nvidia/nv-pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,11 @@ static int nv_resize_pcie_bars(struct pci_dev *pci_dev) {
struct pci_host_bridge *host;
#endif

if (NVreg_EnableResizableBar == 0)
/*if (NVreg_EnableResizableBar == 0)
{
nv_printf(NV_DBG_INFO, "NVRM: resizable BAR disabled by regkey, skipping\n");
return 0;
}
}*/

// Check if BAR1 has PCIe rebar capabilities
sizes = pci_rebar_get_possible_sizes(pci_dev, NV_GPU_BAR1);
Expand Down Expand Up @@ -404,7 +404,7 @@ nv_init_coherent_link_info
/* Fail for the baremetal case */
goto failed;
}

/*
* For the virtualization usecase on SHH, the coherent GPU memory
* PA is exposed as BAR2 to the VM and the "nvidia,gpu-mem-base-pa"
Expand Down Expand Up @@ -1019,7 +1019,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
nv_printf(NV_DBG_ERRORS,
"NVRM: Failed removal of device %04x:%02x:%02x.%x!\n",
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
WARN_ON(1);
goto done;
}
Expand Down
4 changes: 2 additions & 2 deletions src/nvidia/src/kernel/gpu/bif/kernel_bif.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ _kbifInitRegistryOverrides
NvU32 data32;

// P2P Override
pKernelBif->p2pOverride = BIF_P2P_NOT_OVERRIDEN;
pKernelBif->p2pOverride = 0x11;
if (osReadRegistryDword(pGpu, NV_REG_STR_CL_FORCE_P2P, &data32) == NV_OK)
{
pKernelBif->p2pOverride = data32;
Expand All @@ -718,7 +718,7 @@ _kbifInitRegistryOverrides
}

// P2P force type override
pKernelBif->forceP2PType = NV_REG_STR_RM_FORCE_P2P_TYPE_DEFAULT;
pKernelBif->forceP2PType = NV_REG_STR_RM_FORCE_P2P_TYPE_BAR1P2P;
if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FORCE_P2P_TYPE, &data32) == NV_OK &&
(data32 <= NV_REG_STR_RM_FORCE_P2P_TYPE_MAX))
{
Expand Down
14 changes: 7 additions & 7 deletions src/nvidia/src/kernel/gpu/bus/arch/hopper/kern_bus_gh100.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ kbusVerifyBar2_GH100
{
memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
}
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_95,
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_95,
(&memDesc));
if (status != NV_OK)
{
Expand Down Expand Up @@ -765,7 +765,7 @@ kbusCreateP2PMapping_GH100

if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
{
return kbusCreateP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
return kbusCreateP2PMappingForBar1P2P_GH100(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
}

if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes))
Expand Down Expand Up @@ -816,7 +816,7 @@ kbusRemoveP2PMapping_GH100

if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
{
return kbusRemoveP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
return kbusRemoveP2PMappingForBar1P2P_GH100(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
}

if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes))
Expand Down Expand Up @@ -1139,7 +1139,7 @@ kbusIsBar1P2PCapable_GH100
//
// GPU BAR1 supports the SYSMEM mapping for the bar1 doorbell, RM needs
// to make sure that BAR1 VA has 128KB space left for such cases after all
// FB statically mapped in BAR1. Bug 3869651 #14.
// FB statically mapped in BAR1. Bug 3869651 #14.
//
if ((bar1VASize < (32 * RM_PAGE_SIZE)) ||
((bar1VASize - (32 * RM_PAGE_SIZE)) < RM_ALIGN_UP(fbSize, RM_PAGE_SIZE_2M)))
Expand Down Expand Up @@ -1448,7 +1448,7 @@ kbusCreateP2PMappingForBar1P2P_GH100
return NV_ERR_NOT_SUPPORTED;
}

if (!kbusIsPcieBar1P2PMappingSupported_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1))
if (!kbusIsPcieBar1P2PMappingSupported_GH100(pGpu0, pKernelBus0, pGpu1, pKernelBus1))
{
return NV_ERR_NOT_SUPPORTED;
}
Expand Down Expand Up @@ -1871,8 +1871,8 @@ kbusEnableStaticBar1Mapping_GH100
pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = pDmaMemDesc;
pKernelBus->bar1[gfid].staticBar1.size = bar1MapSize;

NV_PRINTF(LEVEL_INFO, "Static bar1 mapped offset 0x%llx size 0x%llx\n",
bar1Offset, bar1MapSize);
NV_PRINTF(LEVEL_ERROR, "Static bar1 mapped offset 0x%llx size 0x%llx bar1BusAddr 0x%llx\n",
bar1Offset, bar1MapSize, bar1BusAddr);

return NV_OK;

Expand Down
20 changes: 10 additions & 10 deletions src/nvidia/src/kernel/gpu/bus/arch/maxwell/kern_bus_gm107.c
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@ kbusInitBar1_GM107(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
//
NV_ASSERT(pKernelBus->bar1[gfid].apertureLength <= kbusGetPciBarSize(pKernelBus, 1));

bBar1P2PCapable = kbusIsBar1P2PCapable(pGpu, pKernelBus, gfid);
bBar1P2PCapable = kbusIsBar1P2PCapable_GH100(pGpu, pKernelBus, gfid);

//
// If we need to preserve a console mapping at the start of BAR1, we
Expand Down Expand Up @@ -1094,7 +1094,7 @@ kbusInitBar1_GM107(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
{
// Enable the static BAR1 mapping for the BAR1 P2P
NV_ASSERT_OK_OR_GOTO(rmStatus,
kbusEnableStaticBar1Mapping_HAL(pGpu, pKernelBus, gfid),
kbusEnableStaticBar1Mapping_GH100(pGpu, pKernelBus, gfid),
kbusInitBar1_failed);
}
else
Expand Down Expand Up @@ -1605,7 +1605,7 @@ kbusSetupBar2GpuVaSpace_GM107
NV_ASSERT_OR_RETURN(status == NV_OK, status);
}

memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_63,
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_63,
pKernelBus->bar2[gfid].pInstBlkMemDesc);
NV_ASSERT_OR_RETURN(status == NV_OK, status);

Expand Down Expand Up @@ -1687,7 +1687,7 @@ kbusSetupBar2GpuVaSpace_GM107

if (IS_GFID_VF(gfid))
{
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_64,
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_64,
pPageLevelsMemDesc);
NV_ASSERT_OR_GOTO(status == NV_OK, cleanup);

Expand Down Expand Up @@ -3013,15 +3013,15 @@ _kbusMapAperture_GM107
NvU32 swizzId = KMIGMGR_SWIZZID_INVALID;
NvU32 gfid;

if (kbusIsStaticBar1Enabled(pGpu, pKernelBus) &&
/*if (kbusIsStaticBar1Enabled(pGpu, pKernelBus) &&
(memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
{
NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
return kbusGetStaticFbAperture_HAL(pGpu, pKernelBus, pMemDesc,
offset, pAperOffset,
pLength, gfid);
}
}*/

// Ensure that the BAR1 VA space is the same across all subdevices
if (IsSLIEnabled(pGpu) && ((mapFlags & BUS_MAP_FB_FLAGS_MAP_UNICAST) == 0))
Expand Down Expand Up @@ -3136,12 +3136,12 @@ _kbusUnmapAperture_GM107
NV_STATUS rmStatus = NV_OK;
VirtMemAllocator *pDma = GPU_GET_DMA(pGpu);

if (kbusIsStaticBar1Enabled(pGpu, pKernelBus) &&
/*if (kbusIsStaticBar1Enabled(pGpu, pKernelBus) &&
(memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
{
// No op for the static bar1 mode
return NV_OK;
}
}*/

rmStatus = dmaFreeMapping_HAL(pGpu, pDma, pVAS, aperOffset, pMemDesc, 0, NULL);

Expand Down Expand Up @@ -3749,7 +3749,7 @@ kbusVerifyBar2_GM107
{
memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
}
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_65,
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_65,
(&memDesc));
if (status != NV_OK)
{
Expand Down Expand Up @@ -4805,7 +4805,7 @@ kbusBar1InstBlkVasUpdate_GM107
NV_ASSERT_OR_RETURN(status == NV_OK, status);
}

memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_66,
memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_66,
pKernelBus->bar1[gfid].pInstBlkMemDesc);
NV_ASSERT_OR_RETURN(status == NV_OK, status);

Expand Down
5 changes: 5 additions & 0 deletions src/nvidia/src/kernel/gpu/bus/arch/pascal/kern_bus_gp100.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ kbusCreateP2PMapping_GP100
return kbusCreateP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
}

if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
{
return kbusCreateP2PMappingForBar1P2P_GH100(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
}

NV_PRINTF(LEVEL_ERROR, "P2P type %d is not supported\n", DRF_VAL(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, attributes));

return NV_ERR_NOT_SUPPORTED;
Expand Down
16 changes: 8 additions & 8 deletions src/nvidia/src/kernel/gpu/bus/p2p_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -575,20 +575,20 @@ p2papiConstruct_IMPL
//
// TODO: This function need to have a cleanup path when this function
// fails after kbusCreateP2PMaping(), busBindLocalGfidForP2P()
// and busBindRemoteGfidForP2P(). The current state, the
// and busBindRemoteGfidForP2P(). The current state, the
// function just returns an error. Bug 4016670 filed to track
// the effort.
//

// setup the p2p resources
NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
kbusCreateP2PMapping_HAL(pLocalGpu, pLocalKernelBus, pRemoteGpu,
kbusCreateP2PMapping_GH100(pLocalGpu, pLocalKernelBus, pRemoteGpu,
pRemoteKernelBus, &peer1, &peer2,
pP2PApi->attributes));
if (bEgmPeer)
{
NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
kbusCreateP2PMapping_HAL(pLocalGpu, pLocalKernelBus, pRemoteGpu,
kbusCreateP2PMapping_GH100(pLocalGpu, pLocalKernelBus, pRemoteGpu,
pRemoteKernelBus, &egmPeer1, &egmPeer2,
pP2PApi->attributes |
DRF_DEF(_P2PAPI, _ATTRIBUTES, _REMOTE_EGM, _YES)));
Expand All @@ -598,14 +598,14 @@ p2papiConstruct_IMPL
(pCallContext->secInfo.privLevel >= RS_PRIV_LEVEL_KERNEL))
{
NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
kbusGetBar1P2PDmaInfo_HAL(pLocalGpu, pRemoteGpu,
kbusGetBar1P2PDmaInfo_GH100(pLocalGpu, pRemoteGpu,
pRemoteKernelBus,
&pNv503bAllocParams->l2pBar1P2PDmaInfo.dma_address,
&pNv503bAllocParams->l2pBar1P2PDmaInfo.dma_size));

NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
kbusGetBar1P2PDmaInfo_HAL(pRemoteGpu, pLocalGpu,
pLocalKernelBus,
kbusGetBar1P2PDmaInfo_GH100(pRemoteGpu, pLocalGpu,
pLocalKernelBus,
&pNv503bAllocParams->p2lBar1P2PDmaInfo.dma_address,
&pNv503bAllocParams->p2lBar1P2PDmaInfo.dma_size));
}
Expand Down Expand Up @@ -740,7 +740,7 @@ p2papiDestruct_IMPL
{
// remove any resources associated with this mapping
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
kbusRemoveP2PMapping_HAL(pLocalGpu, pLocalKernelBus,
kbusRemoveP2PMapping_GH100(pLocalGpu, pLocalKernelBus,
pRemoteGpu, pRemoteKernelBus,
pP2PApi->peerId1, pP2PApi->peerId2,
pP2PApi->attributes), end);
Expand All @@ -749,7 +749,7 @@ p2papiDestruct_IMPL
memmgrIsLocalEgmEnabled(GPU_GET_MEMORY_MANAGER(pRemoteGpu)) &&
!GPU_IS_NVSWITCH_DETECTED(pLocalGpu))
{
status = kbusRemoveP2PMapping_HAL(pLocalGpu, pLocalKernelBus,
status = kbusRemoveP2PMapping_GH100(pLocalGpu, pLocalKernelBus,
pRemoteGpu, pRemoteKernelBus,
pP2PApi->egmPeerId1, pP2PApi->egmPeerId2,
pP2PApi->attributes |
Expand Down
Loading

0 comments on commit 4638499

Please # to comment.