首页 \ 问答 \ CUDA中的原子操作失败(Atomic Operation failed in CUDA)

CUDA中的原子操作失败(Atomic Operation failed in CUDA)

 由于计算能力为2.1， atomicAdd和atomicMax操作不支持双精度，然后我根据堆栈溢出的一些答案定义这两个函数。  
 很奇怪atomicAdd函数运行良好，但atomicMax不起作用，这是我的代码。  
 我的代码测试是在每个块上生成随机数，然后对每个块上的随机数求和，我们有块和，我想在块和上测试atomicAdd和atomicMax 。  
#include <iostream>
#include <curand.h>
#include <curand_kernel.h>
#include <stdio.h>
#include <stdlib.h>


#define num_of_blocks 2
#define threads_per_block 2
#define tot_threads 4


__device__ double gsum[num_of_blocks];

__device__ double dev_sum;

__device__ double dev_max;

// set seed for random number generator
__global__ void initcuRand(curandState* globalState, unsigned long seed){
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    curand_init(seed, idx, 0, &globalState[idx]);
}

// atomiMax for double
__device__ double atomicMax_d(double* address, double val)
{
    unsigned long long int* address_as_i = (unsigned long long int*)address;
    unsigned long long int old = *address_as_i, assumed;
    do {
        assumed = old;
        old = ::atomicCAS(address_as_i, assumed, __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
    } while (assumed != old);
    return __longlong_as_double(old);
}

// atomicAdd for double
__device__ double atomicAdd_d(double* address, double val)
{
    unsigned long long int* address_as_ull = (unsigned long long int*)address;
    unsigned long long int old = *address_as_ull, assumed;
    do{
        assumed = old;
        old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
    }while(assumed != old);
    return __longlong_as_double(old);
}

__global__ void kernel(curandState *globalState){
    // global id
    int gidx    = threadIdx.x + blockIdx.x * blockDim.x;
    // local id
    int lidx    = threadIdx.x;

    // creat shared memory to store seeds
    __shared__ curandState localState[tot_threads];

    __shared__ double srandnum[threads_per_block];

    // copy global seed to local
    localState[lidx]    = globalState[gidx];

    //synchronize the local threads writing to the local memory cache
    __syncthreads();

    // generate random number from normal distribution in shared memory
    srandnum[lidx]  = curand_normal(&localState[lidx]);
    __syncthreads();

    if(lidx == 0){srandnum[lidx] += srandnum[lidx + 1];}   // sum of each block
    if(lidx == 0){gsum[blockIdx.x] = srandnum[lidx];}      // copy the sums back to global memory

    __threadfence();

    if( gidx < num_of_blocks){
        atomicAdd_d(&dev_sum, gsum[gidx]);
    }

    if( gidx < num_of_blocks){
        atomicMax_d(&dev_max, gsum[gidx]);
    }

    if( gidx == 0){
        printf("Sum is: %lf\n", dev_sum);
    }

    if( gidx == 1){
        printf("Max is: %lf\n", dev_max);
    }
}


int main(){
    // set seed on device
    curandState *globalState;
    cudaMalloc((void**)&globalState, tot_threads*sizeof(curandState));
    initcuRand<<<num_of_blocks, threads_per_block>>>(globalState, 1);

    // launch kernel
    kernel<<<num_of_blocks, threads_per_block>>>(globalState);
    double randnum[num_of_blocks];

    cudaMemcpyFromSymbol(randnum, gsum, num_of_blocks*sizeof(double), 0, cudaMemcpyDeviceToHost);

    std::cout << "Sum of each block:\n";
    for (int i = 0; i < num_of_blocks; ++i){
        std::cout << randnum[i] << std::endl;
    }

    cudaFree(globalState);
    return 0;
}
 
 我得到的结果是  
Sum is: -0.898329
Max is: 0.000000
Sum of each block:
-0.0152994
-0.88303
 
 从结果中，我知道atomicAdd函数有效，但atomicMax函数不起作用，我不知道这一点。 先谢谢。 

As the compute ability is 2.1, the atomicAdd and atomicMax operations do not support double precision, then I define both functions based on some answers on stack overflow.  
It is strange that the atomicAdd function works well but the atomicMax doesn't work, here is my code. 
The test of my code is to generate random number on each block, and then sum the random numbers on each block, we have block sum, I want to test the atomicAdd and atomicMax on the block sum. 
#include <iostream>
#include <curand.h>
#include <curand_kernel.h>
#include <stdio.h>
#include <stdlib.h>


#define num_of_blocks 2
#define threads_per_block 2
#define tot_threads 4


__device__ double gsum[num_of_blocks];

__device__ double dev_sum;

__device__ double dev_max;

// set seed for random number generator
__global__ void initcuRand(curandState* globalState, unsigned long seed){
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    curand_init(seed, idx, 0, &globalState[idx]);
}

// atomiMax for double
__device__ double atomicMax_d(double* address, double val)
{
    unsigned long long int* address_as_i = (unsigned long long int*)address;
    unsigned long long int old = *address_as_i, assumed;
    do {
        assumed = old;
        old = ::atomicCAS(address_as_i, assumed, __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
    } while (assumed != old);
    return __longlong_as_double(old);
}

// atomicAdd for double
__device__ double atomicAdd_d(double* address, double val)
{
    unsigned long long int* address_as_ull = (unsigned long long int*)address;
    unsigned long long int old = *address_as_ull, assumed;
    do{
        assumed = old;
        old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
    }while(assumed != old);
    return __longlong_as_double(old);
}

__global__ void kernel(curandState *globalState){
    // global id
    int gidx    = threadIdx.x + blockIdx.x * blockDim.x;
    // local id
    int lidx    = threadIdx.x;

    // creat shared memory to store seeds
    __shared__ curandState localState[tot_threads];

    __shared__ double srandnum[threads_per_block];

    // copy global seed to local
    localState[lidx]    = globalState[gidx];

    //synchronize the local threads writing to the local memory cache
    __syncthreads();

    // generate random number from normal distribution in shared memory
    srandnum[lidx]  = curand_normal(&localState[lidx]);
    __syncthreads();

    if(lidx == 0){srandnum[lidx] += srandnum[lidx + 1];}   // sum of each block
    if(lidx == 0){gsum[blockIdx.x] = srandnum[lidx];}      // copy the sums back to global memory

    __threadfence();

    if( gidx < num_of_blocks){
        atomicAdd_d(&dev_sum, gsum[gidx]);
    }

    if( gidx < num_of_blocks){
        atomicMax_d(&dev_max, gsum[gidx]);
    }

    if( gidx == 0){
        printf("Sum is: %lf\n", dev_sum);
    }

    if( gidx == 1){
        printf("Max is: %lf\n", dev_max);
    }
}


int main(){
    // set seed on device
    curandState *globalState;
    cudaMalloc((void**)&globalState, tot_threads*sizeof(curandState));
    initcuRand<<<num_of_blocks, threads_per_block>>>(globalState, 1);

    // launch kernel
    kernel<<<num_of_blocks, threads_per_block>>>(globalState);
    double randnum[num_of_blocks];

    cudaMemcpyFromSymbol(randnum, gsum, num_of_blocks*sizeof(double), 0, cudaMemcpyDeviceToHost);

    std::cout << "Sum of each block:\n";
    for (int i = 0; i < num_of_blocks; ++i){
        std::cout << randnum[i] << std::endl;
    }

    cudaFree(globalState);
    return 0;
}
 
The result I get is  
Sum is: -0.898329
Max is: 0.000000
Sum of each block:
-0.0152994
-0.88303
 
From the result, I know that the atomicAdd function works but the atomicMax function doesn't work, I have no idea of this. Thanks beforehand.

原文：https://stackoverflow.com/questions/40836630

更新时间：2023-05-18 15:05

最满意答案

 我不确定，但可能是错误发生，因为您没有验证架构中的“类别”。 尝试在“类别”中添加“blackbox：true”，以便它接受任何类型的对象。  
Industry.attachSchema(new SimpleSchema({
    label: {
        type: String
    },
    value: {
        type: String
    },
    categories: {
        type: [Object],
        blackbox:true     // allows all objects
    }          
}));
 
 完成后，尝试像这样添加值  
var newObject = {
    id: categoryId,
    label: newCategory,
    value: newCategory
}
Industry.update({   
    _id: industryId
}, {
    $push: {
        categories: newObject     //newObject can be anything
    }
});
 
 这将允许您将任何类型的对象添加到类别字段中。 但是你在评论中提到类别也是另一个集合。 如果您已经有类别的SimpleSchema，那么您可以验证categories字段以仅接受与SimpleSchema匹配的对象，如此类别  
Industry.attachSchema(new SimpleSchema({
    label: {
        type: String
    },
    value: {
        type: String
    },
    categories: {
        type: [categoriesSchema]    // replace categoriesSchema by name of SimpleSchema for categories
    }          
}));
 
 在这种情况下，只有与categoriesSchema匹配的对象才会被允许进入类别字段。 任何其他类型都将被过滤掉。 你也不会在控制台上遇到任何错误，试图插入其他类型。（这是我认为当你尝试现在插入因为没有指定验证时发生的事情）  
 编辑：答案的解释  
 在SimpleSchema中，当您定义一个对象数组时，您必须验证它，即，您必须告诉它它可以接受哪些对象以及它不能接受哪些对象。 例如，当你定义它时  
...
categories: {
    type: [categoriesSchema]    // Correct
} 
 
 这意味着只能在其中插入与名为categoriesSchema的另一个SimpleSchema结构相似的对象。 根据您的示例，您尝试插入的任何对象都应采用此格式  
{
    id: categoryId,
    label: newCategory,
    value: newCategory
}
 
 插入时将拒绝任何不具有此格式的对象。 这就是为什么你尝试插入的所有对象，当你最初尝试使用这样结构化的架构时，这些对象被拒绝  
...
categories: {
    type: [Object]     // Not correct as there is no SimpleSchema named 'Object' to match with
}
 
 黑盒：真  
 现在，假设您没有要过滤的对象，并希望在未经验证的情况下插入所有对象。 这就是设置“blackbox：true”的地方。如果你定义了这样的字段  
...
categories: {
    type: [Object],     // Correct
    blackbox:true
}
 
 这意味着类别可以是任何对象，不需要针对其他一些SimpleSchema进行验证。 所以无论你试图插入什么，都会被接受。 

I am not sure but maybe the error is occuring because you are not validating 'categories' in your schema. Try adding a 'blackbox:true' to your 'categories' so that it accepts any types of objects. 
Industry.attachSchema(new SimpleSchema({
    label: {
        type: String
    },
    value: {
        type: String
    },
    categories: {
        type: [Object],
        blackbox:true     // allows all objects
    }          
}));
 
Once you've done that try adding values to it like this 
var newObject = {
    id: categoryId,
    label: newCategory,
    value: newCategory
}
Industry.update({   
    _id: industryId
}, {
    $push: {
        categories: newObject     //newObject can be anything
    }
});
 
This would allow you to add any kind of object into the categories field. But you mentioned in a comment that categories is also another collection. If you already have a SimpleSchema for categories then you could validate the categories field to only accept objects that match with the SimpleSchema for categories like this 
Industry.attachSchema(new SimpleSchema({
    label: {
        type: String
    },
    value: {
        type: String
    },
    categories: {
        type: [categoriesSchema]    // replace categoriesSchema by name of SimpleSchema for categories
    }          
}));
 
In this case only objects that match categoriesSchema will be allowed into categories field. Any other type would be filtered out. Also you wouldnt get any error on console for trying to insert other types.(which is what i think is happening when you try to insert now as no validation is specified) 
EDIT : EXPLANATION OF ANSWER 
In a SimpleSchema when you define an array of objects you have to validate it,ie, you have to tell it what objects it can accept and what it can't. For example when you define it like 
...
categories: {
    type: [categoriesSchema]    // Correct
} 
 
it means that objects that are similar in structure to those in another SimpleSchema named categoriesSchema only can be inserted into it. According to your example any object you try to insert should be of this format 
{
    id: categoryId,
    label: newCategory,
    value: newCategory
}
 
Any object that isn't of this format will be rejected while insert. Thats why all objects you tried to insert where rejected when you tried initially with your schema structured like this 
...
categories: {
    type: [Object]     // Not correct as there is no SimpleSchema named 'Object' to match with
}
 
Blackbox:true 
Now, lets say you don't what your object to be filtered and want all objects to be inserted without validation. Thats where setting "blackbox:true" comes in. If you define a field like this 
...
categories: {
    type: [Object],     // Correct
    blackbox:true
}
 
it means that categories can be any object and need not be validated with respect to some other SimpleSchema. So whatever you try to insert gets accepted.

CUDA中的原子操作失败(Atomic Operation failed in CUDA)

最满意答案

相关问答

MongoDB：更新集合中的数组项不起作用(MongoDB: Updating array item in collection is not working)[2022-04-14]

NodeJs MongoDB - 在更新集合中的某个字段上的每个文档时都存在TypeError(NodeJs MongoDB -Having TypeError while updating every document on one field in a collection)[2023-03-06]

MONGODB - 将集合中的字段添加到Item数组中的字段(MONGODB - adding a field in collection to a field in Item array)[2023-12-22]

使用MongoDB / Meteor更新数组中的特定元素(Updating a specific element in an array with MongoDB / Meteor)[2023-05-27]

使用未知修改字段更新MongoDB集合的最佳实践(Best practice for updating a MongoDB collection with unknown modified fields)[2023-01-06]

如何将mongodb数组长度作为附加集合字段发布？(How to publish a mongodb array length as an additional collection field?)[2023-03-08]

从MongoDB集合中删除空的数组字段(Delete empty array fields from MongoDB collection)[2024-01-12]

如何从集合中插入多个文档作为另一个文档中的数组字段 - MongoDB(How to insert multiple documents from a collection as an array field in another document - MongoDB)[2022-09-28]

更新mongodb集合中的数组字段(Updating an array field in a mongodb collection)[2022-03-25]

为阵列字段更新MongoDB集合toLowercase(Update MongoDB collection toLowercase for array field)[2024-03-02]

相关文章

最新问答