Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 2e86b92

Browse files
josephevansJoe Evans
andauthored
Don't use namespace for pow() function, since it is built into cuda math library, and cast the second argument so it will find an acceptable form. (#19533)
Remove thrust library override and use default from cuda 11.0. Fix lint. Co-authored-by: Joe Evans <joeev@amazon.com>
1 parent 2721518 commit 2e86b92

2 files changed

Lines changed: 5 additions & 20 deletions

File tree

ci/build_windows.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -157,20 +157,6 @@ def windows_build(args):
157157
mxnet_root = get_mxnet_root()
158158
logging.info("Found MXNet root: {}".format(mxnet_root))
159159

160-
if 'GPU' in args.flavour:
161-
# Get Thrust version to be shipped in Cuda 11, due to flakyness of
162-
# older Thrust versions with MSVC 19 compiler
163-
with remember_cwd():
164-
tmpdirname = tempfile.mkdtemp()
165-
os.chdir(tmpdirname)
166-
r = requests.get('https://github.com/thrust/thrust/archive/1.9.8.zip', allow_redirects=True)
167-
with open('thrust.zip', 'wb') as f:
168-
f.write(r.content)
169-
with zipfile.ZipFile('thrust.zip', 'r') as zip_ref:
170-
zip_ref.extractall('.')
171-
thrust_path = os.path.join(tmpdirname, "thrust-1.9.8")
172-
173-
174160
# cuda thrust / CUB + VS 2019 is flaky: try multiple times if fail
175161
MAXIMUM_TRY = 5
176162
build_try = 0
@@ -184,8 +170,7 @@ def windows_build(args):
184170
os.chdir(path)
185171
env = os.environ.copy()
186172
if 'GPU' in args.flavour:
187-
env["CXXFLAGS"] = '/FS /MD /O2 /Ob2 /I {}'.format(thrust_path)
188-
env["CUDAFLAGS"] = '-I {}'.format(thrust_path)
173+
env["CXXFLAGS"] = '/FS /MD /O2 /Ob2'
189174
cmd = "\"{}\" && cmake -GNinja {} {}".format(args.vcvars,
190175
CMAKE_FLAGS[args.flavour],
191176
mxnet_root)

src/operator/contrib/multi_lamb.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ __global__ void KernelStep1(const MultiLAMBKernelParam<DType, MPDType> kernel_pa
5050

5151
MPDType biascorrection1, biascorrection2;
5252
if (bias_correction) {
53-
biascorrection1 = 1.0 -
54-
static_cast<MPDType>(std::pow(beta1, kernel_params.step_count[tensor_id]));
55-
biascorrection2 = 1.0 -
56-
static_cast<MPDType>(std::pow(beta2, kernel_params.step_count[tensor_id]));
53+
biascorrection1 = 1.0 - static_cast<MPDType>(
54+
pow(beta1, static_cast<float>(kernel_params.step_count[tensor_id])));
55+
biascorrection2 = 1.0 - static_cast<MPDType>(
56+
pow(beta2, static_cast<float>(kernel_params.step_count[tensor_id])));
5757
} else {
5858
biascorrection1 = static_cast<MPDType>(1.0);
5959
biascorrection2 = static_cast<MPDType>(1.0);

0 commit comments

Comments
 (0)