Objet : Developers list for StarPU
Archives de la liste
- From: Samuel Thibault <samuel.thibault@ens-lyon.org>
- To: Xavier Lacoste <xavier.lacoste@inria.fr>
- Cc: starpu-devel@lists.gforge.inria.fr
- Subject: Re: [Starpu-devel] GPU issue with r12137
- Date: Mon, 31 Mar 2014 15:01:32 +0200
- List-archive: <http://lists.gforge.inria.fr/pipermail/starpu-devel>
- List-id: "Developers list. For discussion of new features, code changes, etc." <starpu-devel.lists.gforge.inria.fr>
Xavier Lacoste, le Fri 28 Mar 2014 15:42:51 +0100, a écrit :
> I don't see any reason why a commit called "Use different streams for
> gpu-gpu
> transfers" would alter a mono-gpu execution....
Well, actually there is also another change in it: we also make all
threads able to use streams, not only the GPU-driving threads. Could you
try the attached patch, which reverts just that? I'm afraid that means
diving into a CUDA bug, I don't see what is wrong in us doing that.
Samuel
Index: src/datawizard/copy_driver.c
===================================================================
--- src/datawizard/copy_driver.c (révision 12511)
+++ src/datawizard/copy_driver.c (copie de travail)
@@ -161,7 +161,7 @@
cures =
cudaEventCreate(&req->async_channel.event.cuda_event);
if (STARPU_UNLIKELY(cures != cudaSuccess))
STARPU_CUDA_REPORT_ERROR(cures);
- stream =
starpu_cuda_get_out_transfer_stream(src_node);
+ stream = starpu_cuda_get_local_out_transfer_stream();
if (copy_methods->cuda_to_ram_async)
ret =
copy_methods->cuda_to_ram_async(src_interface, src_node, dst_interface,
dst_node, stream);
else
@@ -197,7 +197,7 @@
if (STARPU_UNLIKELY(cures != cudaSuccess))
STARPU_CUDA_REPORT_ERROR(cures);
- stream = starpu_cuda_get_in_transfer_stream(dst_node);
+ stream = starpu_cuda_get_local_in_transfer_stream();
if (copy_methods->ram_to_cuda_async)
ret =
copy_methods->ram_to_cuda_async(src_interface, src_node, dst_interface,
dst_node, stream);
else
@@ -411,7 +411,7 @@
(void*) src + src_offset, src_node,
(void*) dst + dst_offset, dst_node,
size,
-
async_channel?starpu_cuda_get_out_transfer_stream(src_node):NULL,
+
async_channel?starpu_cuda_get_local_out_transfer_stream():NULL,
cudaMemcpyDeviceToHost);
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_CUDA_RAM):
@@ -419,7 +419,7 @@
(void*) src + src_offset, src_node,
(void*) dst + dst_offset, dst_node,
size,
-
async_channel?starpu_cuda_get_in_transfer_stream(dst_node):NULL,
+
async_channel?starpu_cuda_get_local_in_transfer_stream():NULL,
cudaMemcpyHostToDevice);
case _STARPU_MEMORY_NODE_TUPLE(STARPU_CUDA_RAM,STARPU_CUDA_RAM):
Index: src/drivers/cuda/driver_cuda.c
===================================================================
--- src/drivers/cuda/driver_cuda.c (révision 12509)
+++ src/drivers/cuda/driver_cuda.c (copie de travail)
@@ -42,8 +42,8 @@
static size_t global_mem[STARPU_NMAXWORKERS];
#ifdef STARPU_USE_CUDA
static cudaStream_t streams[STARPU_NMAXWORKERS];
-static cudaStream_t out_transfer_streams[STARPU_MAXCUDADEVS];
-static cudaStream_t in_transfer_streams[STARPU_MAXCUDADEVS];
+static cudaStream_t out_transfer_streams[STARPU_NMAXWORKERS];
+static cudaStream_t in_transfer_streams[STARPU_NMAXWORKERS];
static cudaStream_t
peer_transfer_streams[STARPU_MAXCUDADEVS][STARPU_MAXCUDADEVS];
static struct cudaDeviceProp props[STARPU_MAXCUDADEVS];
#endif /* STARPU_USE_CUDA */
@@ -115,18 +115,18 @@
}
#ifdef STARPU_USE_CUDA
-cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned node)
+cudaStream_t starpu_cuda_get_local_in_transfer_stream(void)
{
- int devid = _starpu_memory_node_get_devid(node);
+ int worker = starpu_worker_get_id();
- return in_transfer_streams[devid];
+ return in_transfer_streams[worker];
}
-cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned node)
+cudaStream_t starpu_cuda_get_local_out_transfer_stream(void)
{
- int devid = _starpu_memory_node_get_devid(node);
+ int worker = starpu_worker_get_id();
- return out_transfer_streams[devid];
+ return out_transfer_streams[worker];
}
cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node,
unsigned dst_node)
@@ -256,11 +256,11 @@
if (STARPU_UNLIKELY(cures))
STARPU_CUDA_REPORT_ERROR(cures);
- cures = cudaStreamCreate(&in_transfer_streams[devid]);
+ cures = cudaStreamCreate(&in_transfer_streams[workerid]);
if (STARPU_UNLIKELY(cures))
STARPU_CUDA_REPORT_ERROR(cures);
- cures = cudaStreamCreate(&out_transfer_streams[devid]);
+ cures = cudaStreamCreate(&out_transfer_streams[workerid]);
if (STARPU_UNLIKELY(cures))
STARPU_CUDA_REPORT_ERROR(cures);
@@ -279,8 +279,8 @@
int i;
cudaStreamDestroy(streams[workerid]);
- cudaStreamDestroy(in_transfer_streams[devid]);
- cudaStreamDestroy(out_transfer_streams[devid]);
+ cudaStreamDestroy(in_transfer_streams[workerid]);
+ cudaStreamDestroy(out_transfer_streams[workerid]);
for (i = 0; i < ncudagpus; i++)
cudaStreamDestroy(peer_transfer_streams[i][devid]);
Index: src/drivers/cuda/driver_cuda.h
===================================================================
--- src/drivers/cuda/driver_cuda.h (révision 12509)
+++ src/drivers/cuda/driver_cuda.h (copie de travail)
@@ -48,8 +48,8 @@
# define _starpu_cuda_discover_devices(config) ((void) config)
#endif
#ifdef STARPU_USE_CUDA
-cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned node);
-cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned node);
+cudaStream_t starpu_cuda_get_local_in_transfer_stream(void);
+cudaStream_t starpu_cuda_get_local_out_transfer_stream(void);
cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node,
unsigned dst_node);
struct _starpu_worker;
- [Starpu-devel] GPU issue with r12137, Xavier Lacoste, 28/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Xavier Lacoste, 28/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Xavier Lacoste, 28/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Samuel Thibault, 31/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Xavier Lacoste, 31/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Samuel Thibault, 31/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Xavier Lacoste, 28/03/2014
- Re: [Starpu-devel] GPU issue with r12137, Xavier Lacoste, 28/03/2014
Archives gérées par MHonArc 2.6.19+.