forked from deepspeedai/DeepSpeed
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request deepspeedai#17 from rraminen/IFU_5_27
IFU-master-2021-05-27
- Loading branch information
Showing
97 changed files
with
58,512 additions
and
924 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule DeepSpeedExamples
updated
from 9524d9 to 36846d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#pragma once | ||
|
||
#include <cooperative_groups.h> | ||
#include <cuda.h> | ||
#include <cuda_fp16.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <cassert> | ||
#include <iostream> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#include <ATen/cuda/CUDAContext.h> | ||
#include <torch/extension.h> | ||
#include <vector> | ||
#include "custom_cuda_layers.h" | ||
|
||
template <typename T> | ||
at::Tensor ds_quantize(at::Tensor& vals, int groups, int bits) | ||
{ | ||
auto t_size = vals.sizes(); | ||
int size = 1; | ||
for (auto dim : t_size) size *= dim; | ||
|
||
if ((((size / groups) - 1) / 4096 + 1) <= MAX_REG) { | ||
launch_qunatize_kernel( | ||
(T*)vals.data_ptr(), size, groups, bits, at::cuda::getCurrentCUDAStream()); | ||
} | ||
return vals; | ||
} | ||
|
||
template <typename T> | ||
at::Tensor ds_sr_quantize(at::Tensor& vals, int groups, int bits) | ||
{ | ||
auto t_size = vals.sizes(); | ||
int size = 1; | ||
for (auto dim : t_size) size *= dim; | ||
|
||
if (((size / groups) / 4 / 1024) <= 256) { | ||
launch_sr_qunatize_kernel( | ||
(T*)vals.data_ptr(), size, groups, bits, at::cuda::getCurrentCUDAStream()); | ||
} | ||
return vals; | ||
} | ||
|
||
template <typename T> | ||
at::Tensor ds_quantize_asym(at::Tensor& vals, int groups, int bits) | ||
{ | ||
auto t_size = vals.sizes(); | ||
int size = 1; | ||
for (auto dim : t_size) size *= dim; | ||
|
||
if ((((size / groups) - 1) / 4096 + 1) <= MAX_REG) { | ||
launch_qunatize_kernel_asym( | ||
(T*)vals.data_ptr(), size, groups, bits, at::cuda::getCurrentCUDAStream()); | ||
} | ||
return vals; | ||
} | ||
|
||
template <typename T> | ||
at::Tensor ds_sr_quantize_asym(at::Tensor& vals, int groups, int bits) | ||
{ | ||
auto t_size = vals.sizes(); | ||
int size = 1; | ||
for (auto dim : t_size) size *= dim; | ||
|
||
if (((size / groups) / 4 / 1024) <= 256) { | ||
launch_sr_qunatize_kernel_asym( | ||
(T*)vals.data_ptr(), size, groups, bits, at::cuda::getCurrentCUDAStream()); | ||
} | ||
return vals; | ||
} | ||
|
||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) | ||
{ | ||
m.def("ds_quantize_fp32", &ds_quantize<float>, "DeepSpeed Quantize with fp32 (CUDA)"); | ||
m.def("ds_quantize_fp16", &ds_quantize<__half>, "DeepSpeed Quantize with fp16 (CUDA)"); | ||
m.def("ds_sr_quantize_fp32", &ds_sr_quantize<float>, "DeepSpeed Quantize with fp32 (CUDA)"); | ||
m.def("ds_sr_quantize_fp16", &ds_sr_quantize<__half>, "DeepSpeed Quantize with fp16 (CUDA)"); | ||
m.def("ds_quantize_asym_fp32", &ds_quantize_asym<float>, "DeepSpeed Quantize with fp32 (CUDA)"); | ||
m.def( | ||
"ds_quantize_asym_fp16", &ds_quantize_asym<__half>, "DeepSpeed Quantize with fp16 (CUDA)"); | ||
m.def("ds_sr_quantize_asym_fp32", | ||
&ds_sr_quantize_asym<float>, | ||
"DeepSpeed Quantize with fp32 (CUDA)"); | ||
m.def("ds_sr_quantize_asym_fp16", | ||
&ds_sr_quantize_asym<__half>, | ||
"DeepSpeed Quantize with fp16 (CUDA)"); | ||
} |
Oops, something went wrong.