CUDA中的原子操作失败(Atomic Operation failed in CUDA)
由于计算能力为2.1,
atomicAdd
和atomicMax
操作不支持双精度,然后我根据堆栈溢出的一些答案定义这两个函数。很奇怪
atomicAdd
函数运行良好,但atomicMax
不起作用,这是我的代码。我的代码测试是在每个块上生成随机数,然后对每个块上的随机数求和,我们有块和,我想在块和上测试
atomicAdd
和atomicMax
。#include <iostream> #include <curand.h> #include <curand_kernel.h> #include <stdio.h> #include <stdlib.h> #define num_of_blocks 2 #define threads_per_block 2 #define tot_threads 4 __device__ double gsum[num_of_blocks]; __device__ double dev_sum; __device__ double dev_max; // set seed for random number generator __global__ void initcuRand(curandState* globalState, unsigned long seed){ int idx = threadIdx.x + blockIdx.x * blockDim.x; curand_init(seed, idx, 0, &globalState[idx]); } // atomiMax for double __device__ double atomicMax_d(double* address, double val) { unsigned long long int* address_as_i = (unsigned long long int*)address; unsigned long long int old = *address_as_i, assumed; do { assumed = old; old = ::atomicCAS(address_as_i, assumed, __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); } while (assumed != old); return __longlong_as_double(old); } // atomicAdd for double __device__ double atomicAdd_d(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; do{ assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); }while(assumed != old); return __longlong_as_double(old); } __global__ void kernel(curandState *globalState){ // global id int gidx = threadIdx.x + blockIdx.x * blockDim.x; // local id int lidx = threadIdx.x; // creat shared memory to store seeds __shared__ curandState localState[tot_threads]; __shared__ double srandnum[threads_per_block]; // copy global seed to local localState[lidx] = globalState[gidx]; //synchronize the local threads writing to the local memory cache __syncthreads(); // generate random number from normal distribution in shared memory srandnum[lidx] = curand_normal(&localState[lidx]); __syncthreads(); if(lidx == 0){srandnum[lidx] += srandnum[lidx + 1];} // sum of each block if(lidx == 0){gsum[blockIdx.x] = srandnum[lidx];} // copy the sums back to global memory __threadfence(); if( gidx < num_of_blocks){ atomicAdd_d(&dev_sum, gsum[gidx]); } if( gidx < num_of_blocks){ atomicMax_d(&dev_max, gsum[gidx]); } if( gidx == 0){ printf("Sum is: %lf\n", dev_sum); } if( gidx == 1){ printf("Max is: %lf\n", dev_max); } } int main(){ // set seed on device curandState *globalState; cudaMalloc((void**)&globalState, tot_threads*sizeof(curandState)); initcuRand<<<num_of_blocks, threads_per_block>>>(globalState, 1); // launch kernel kernel<<<num_of_blocks, threads_per_block>>>(globalState); double randnum[num_of_blocks]; cudaMemcpyFromSymbol(randnum, gsum, num_of_blocks*sizeof(double), 0, cudaMemcpyDeviceToHost); std::cout << "Sum of each block:\n"; for (int i = 0; i < num_of_blocks; ++i){ std::cout << randnum[i] << std::endl; } cudaFree(globalState); return 0; }
我得到的结果是
Sum is: -0.898329 Max is: 0.000000 Sum of each block: -0.0152994 -0.88303
从结果中,我知道
atomicAdd
函数有效,但atomicMax
函数不起作用,我不知道这一点。 先谢谢。As the compute ability is 2.1, the
atomicAdd
andatomicMax
operations do not support double precision, then I define both functions based on some answers on stack overflow.It is strange that the
atomicAdd
function works well but theatomicMax
doesn't work, here is my code.The test of my code is to generate random number on each block, and then sum the random numbers on each block, we have block sum, I want to test the
atomicAdd
andatomicMax
on the block sum.#include <iostream> #include <curand.h> #include <curand_kernel.h> #include <stdio.h> #include <stdlib.h> #define num_of_blocks 2 #define threads_per_block 2 #define tot_threads 4 __device__ double gsum[num_of_blocks]; __device__ double dev_sum; __device__ double dev_max; // set seed for random number generator __global__ void initcuRand(curandState* globalState, unsigned long seed){ int idx = threadIdx.x + blockIdx.x * blockDim.x; curand_init(seed, idx, 0, &globalState[idx]); } // atomiMax for double __device__ double atomicMax_d(double* address, double val) { unsigned long long int* address_as_i = (unsigned long long int*)address; unsigned long long int old = *address_as_i, assumed; do { assumed = old; old = ::atomicCAS(address_as_i, assumed, __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); } while (assumed != old); return __longlong_as_double(old); } // atomicAdd for double __device__ double atomicAdd_d(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; do{ assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); }while(assumed != old); return __longlong_as_double(old); } __global__ void kernel(curandState *globalState){ // global id int gidx = threadIdx.x + blockIdx.x * blockDim.x; // local id int lidx = threadIdx.x; // creat shared memory to store seeds __shared__ curandState localState[tot_threads]; __shared__ double srandnum[threads_per_block]; // copy global seed to local localState[lidx] = globalState[gidx]; //synchronize the local threads writing to the local memory cache __syncthreads(); // generate random number from normal distribution in shared memory srandnum[lidx] = curand_normal(&localState[lidx]); __syncthreads(); if(lidx == 0){srandnum[lidx] += srandnum[lidx + 1];} // sum of each block if(lidx == 0){gsum[blockIdx.x] = srandnum[lidx];} // copy the sums back to global memory __threadfence(); if( gidx < num_of_blocks){ atomicAdd_d(&dev_sum, gsum[gidx]); } if( gidx < num_of_blocks){ atomicMax_d(&dev_max, gsum[gidx]); } if( gidx == 0){ printf("Sum is: %lf\n", dev_sum); } if( gidx == 1){ printf("Max is: %lf\n", dev_max); } } int main(){ // set seed on device curandState *globalState; cudaMalloc((void**)&globalState, tot_threads*sizeof(curandState)); initcuRand<<<num_of_blocks, threads_per_block>>>(globalState, 1); // launch kernel kernel<<<num_of_blocks, threads_per_block>>>(globalState); double randnum[num_of_blocks]; cudaMemcpyFromSymbol(randnum, gsum, num_of_blocks*sizeof(double), 0, cudaMemcpyDeviceToHost); std::cout << "Sum of each block:\n"; for (int i = 0; i < num_of_blocks; ++i){ std::cout << randnum[i] << std::endl; } cudaFree(globalState); return 0; }
The result I get is
Sum is: -0.898329 Max is: 0.000000 Sum of each block: -0.0152994 -0.88303
From the result, I know that the
atomicAdd
function works but theatomicMax
function doesn't work, I have no idea of this. Thanks beforehand.
原文:https://stackoverflow.com/questions/40836630
最满意答案
我不确定,但可能是错误发生,因为您没有验证架构中的“类别”。 尝试在“类别”中添加“blackbox:true”,以便它接受任何类型的对象。
Industry.attachSchema(new SimpleSchema({ label: { type: String }, value: { type: String }, categories: { type: [Object], blackbox:true // allows all objects } }));
完成后,尝试像这样添加值
var newObject = { id: categoryId, label: newCategory, value: newCategory } Industry.update({ _id: industryId }, { $push: { categories: newObject //newObject can be anything } });
这将允许您将任何类型的对象添加到类别字段中。 但是你在评论中提到类别也是另一个集合。 如果您已经有类别的SimpleSchema,那么您可以验证categories字段以仅接受与SimpleSchema匹配的对象,如此类别
Industry.attachSchema(new SimpleSchema({ label: { type: String }, value: { type: String }, categories: { type: [categoriesSchema] // replace categoriesSchema by name of SimpleSchema for categories } }));
在这种情况下,只有与categoriesSchema匹配的对象才会被允许进入类别字段。 任何其他类型都将被过滤掉。 你也不会在控制台上遇到任何错误,试图插入其他类型。(这是我认为当你尝试现在插入因为没有指定验证时发生的事情)
编辑:答案的解释
在SimpleSchema中,当您定义一个对象数组时,您必须验证它,即,您必须告诉它它可以接受哪些对象以及它不能接受哪些对象。 例如,当你定义它时
... categories: { type: [categoriesSchema] // Correct }
这意味着只能在其中插入与名为categoriesSchema的另一个SimpleSchema结构相似的对象。 根据您的示例,您尝试插入的任何对象都应采用此格式
{ id: categoryId, label: newCategory, value: newCategory }
插入时将拒绝任何不具有此格式的对象。 这就是为什么你尝试插入的所有对象,当你最初尝试使用这样结构化的架构时,这些对象被拒绝
... categories: { type: [Object] // Not correct as there is no SimpleSchema named 'Object' to match with }
黑盒:真
现在,假设您没有要过滤的对象,并希望在未经验证的情况下插入所有对象。 这就是设置“blackbox:true”的地方。如果你定义了这样的字段
... categories: { type: [Object], // Correct blackbox:true }
这意味着类别可以是任何对象,不需要针对其他一些SimpleSchema进行验证。 所以无论你试图插入什么,都会被接受。
I am not sure but maybe the error is occuring because you are not validating 'categories' in your schema. Try adding a 'blackbox:true' to your 'categories' so that it accepts any types of objects.
Industry.attachSchema(new SimpleSchema({ label: { type: String }, value: { type: String }, categories: { type: [Object], blackbox:true // allows all objects } }));
Once you've done that try adding values to it like this
var newObject = { id: categoryId, label: newCategory, value: newCategory } Industry.update({ _id: industryId }, { $push: { categories: newObject //newObject can be anything } });
This would allow you to add any kind of object into the categories field. But you mentioned in a comment that categories is also another collection. If you already have a SimpleSchema for categories then you could validate the categories field to only accept objects that match with the SimpleSchema for categories like this
Industry.attachSchema(new SimpleSchema({ label: { type: String }, value: { type: String }, categories: { type: [categoriesSchema] // replace categoriesSchema by name of SimpleSchema for categories } }));
In this case only objects that match categoriesSchema will be allowed into categories field. Any other type would be filtered out. Also you wouldnt get any error on console for trying to insert other types.(which is what i think is happening when you try to insert now as no validation is specified)
EDIT : EXPLANATION OF ANSWER
In a SimpleSchema when you define an array of objects you have to validate it,ie, you have to tell it what objects it can accept and what it can't. For example when you define it like
... categories: { type: [categoriesSchema] // Correct }
it means that objects that are similar in structure to those in another SimpleSchema named categoriesSchema only can be inserted into it. According to your example any object you try to insert should be of this format
{ id: categoryId, label: newCategory, value: newCategory }
Any object that isn't of this format will be rejected while insert. Thats why all objects you tried to insert where rejected when you tried initially with your schema structured like this
... categories: { type: [Object] // Not correct as there is no SimpleSchema named 'Object' to match with }
Blackbox:true
Now, lets say you don't what your object to be filtered and want all objects to be inserted without validation. Thats where setting "blackbox:true" comes in. If you define a field like this
... categories: { type: [Object], // Correct blackbox:true }
it means that categories can be any object and need not be validated with respect to some other SimpleSchema. So whatever you try to insert gets accepted.
相关问答
更多-
正如在问题的评论中讨论的那样,这个问题直接关系到在查询中传入一个id的字符串表示形式,而不是使用ObjectId。 一般来说,将ObjectId作为规则的使用以及将字符串表示用作特殊异常(例如,在诸如findByIdAndUpdate方法中)是避免此问题的良好习惯。 const { ObjectId } = require('mongodb'); .update({ _id: ObjectId(projectId), 'en-GB._id': ObjectId(entryId), }) As d ...
-
你拼错了更新。 它的更新不会升级。 You've misspelled update. Its update not uptade.
-
MONGODB - 将集合中的字段添加到Item数组中的字段(MONGODB - adding a field in collection to a field in Item array)[2023-12-22]
db.YOUR_COLLECTION.aggregate({ $match: { items.sku: "xxx" }, $project: { "product": {$multiply: ["$items.qty","$items.price"]}, _id: 0 } }); I am able to achieve the solution to this by separating the query into tw ... -
使用MongoDB / Meteor更新数组中的特定元素(Updating a specific element in an array with MongoDB / Meteor)[2023-05-27]
您不能将变量用作对象文字中的键。 试试这个: var obj = {}; obj["users_voted." + index] = u_object; Posts.update({_id: post_id}, {$set: obj}); You can't use variables as keys in an object literal. Give this a try: var obj = {}; obj["users_voted." + index] = u_object; Posts.upda ... -
使用未知修改字段更新MongoDB集合的最佳实践(Best practice for updating a MongoDB collection with unknown modified fields)[2023-01-06]
您可能正在检索整个对象以显示可编辑的表单 - 我假设您将显示当前名称,电子邮件等,并允许他们编辑和提交新版本。 这意味着您可以检查以查看更改内容并进行更新,而不是每次都上传和替换整个对象。 我建议使用优化的更新操作: “MongoDB更新” 。 这样,您可以执行更优化的就地操作,而不是每次要编辑特定字段时替换整个对象。 您可能主要关注$set , $addToSet ,以及(如果您正在处理许多嵌套对象, $ position运算符)。 来自文档: 修改器操作 在更新现有值时,修改器操作非常高效且有用; 例如 ... -
如何将mongodb数组长度作为附加集合字段发布?(How to publish a mongodb array length as an additional collection field?)[2023-03-08]
您可以向Meteor添加aggregation framework支持 ,然后使用带有$project阶段的简单聚合管道,如下所示: myItems.aggregate( [ {$match: {secret: true}}, {$project: {_id: 1, name: 1, items_count: {$size: '$items'}}} ] ) You can add aggregation framework support to Meteor, ... -
做到这一点 db.collection.update({tags: [""]},{$unset: {tags:1}},{multi: true}); {multi: true}会在适用的情况下从多个文档中移除tags 。 Do it with db.collection.update({tags: [""]},{$unset: {tags:1}},{multi: true}); {multi: true} will remove tags from multiple documents, whereve ...
-
我想通了:我需要添加标志: db.results.update({"id": 2}, {$push: {"arr": doc }}, true, false) I figured it out: I needed to add flags: db.results.update({"id": 2}, {$push: {"arr": doc }}, true, false)
-
我不确定,但可能是错误发生,因为您没有验证架构中的“类别”。 尝试在“类别”中添加“blackbox:true”,以便它接受任何类型的对象。 Industry.attachSchema(new SimpleSchema({ label: { type: String }, value: { type: String }, categories: { type: [Object], blackbox:tru ...
-
i = i.toLowerCase()实际上并不更新对象。 相反,您可以使用地图替换数组本身: e.trackNames = e.trackNames.map(function (trackName) { return trackName.toLowerCase(); }); 这将更新原始对象上的trackNames数组,然后您可以保存该数组。 i = i.toLowerCase() does not actually update the object. Instead you can replac ...