
Before Change

    // registers to be invoked more threads per block:
    dev = misc.get_current_device()
    max_threads_per_block = 256
    block_dim, grid_dim = \
               misc.select_block_grid_sizes(dev, z_gpu.shape, max_threads_per_block)

    // Set this to False when debugging to make sure the compiled kernel is

After Change

        raise ValueError("unsupported type")
    op = "e[i] = exp1(z[i])"
        func = exp1.cache[z_gpu.dtype]
    except KeyError:
        func = elementwise.ElementwiseKernel(args, op,
                                 options=["-I", install_headers],
                                 preamble="//include "cuSpecialFuncs.h"")
        exp1.cache[z_gpu.dtype] = func

    e_gpu = gpuarray.empty_like(z_gpu)
    func(z_gpu, e_gpu)

    return e_gpu
Italian Trulli

Frequency: 3

Non-data size: 14


Project Name: lebedov/scikit-cuda
Commit Name: 5dc3e70446c09312e2bd6dbe263b9af6f8405e62
Time: 2014-05-04
Author: lev@columbia.edu
File Name: scikits/cuda/special.py
Class Name:
Method Name: exp1

Project Name: lebedov/scikit-cuda
Commit Name: 5dc3e70446c09312e2bd6dbe263b9af6f8405e62
Time: 2014-05-04
Author: lev@columbia.edu
File Name: scikits/cuda/special.py
Class Name:
Method Name: expi

Project Name: lebedov/scikit-cuda
Commit Name: 5dc3e70446c09312e2bd6dbe263b9af6f8405e62
Time: 2014-05-04
Author: lev@columbia.edu
File Name: scikits/cuda/special.py
Class Name:
Method Name: sici