Accéder au contenu.
Menu Sympa

starpu-devel - Re: [Starpu-devel] Is StarPU busy-waiting for tasks ?

Objet : Developers list for StarPU

Archives de la liste

Re: [Starpu-devel] Is StarPU busy-waiting for tasks ?


Chronologique Discussions 
  • From: Benoît Lizé <benoit.lize@gmail.com>
  • To: Samuel Thibault <samuel.thibault@ens-lyon.org>, Benoît Lizé <benoit.lize@gmail.com>, starpu-devel@lists.gforge.inria.fr
  • Subject: Re: [Starpu-devel] Is StarPU busy-waiting for tasks ?
  • Date: Thu, 7 Nov 2013 10:39:40 +0100
  • List-archive: <http://lists.gforge.inria.fr/pipermail/starpu-devel>
  • List-id: "Developers list. For discussion of new features, code changes, etc." <starpu-devel.lists.gforge.inria.fr>

Hello,

I have a fairly simple patch that works for my use case, and I believe
is correct. The overhead is too small to measure in my tests.
However, looking at a very simple example, the performance advantage
doesn't seem very large. This is on Linux with a processor that has
HyperThreading, and might be due to the "rep; nop" instruction
(STARPU_UYIELD()) in driver_common.c (line 185).

According to [1], "rep; nop" is equivalent to "pause", which is
described in the intel instruction set reference manual [2] (p. 4-57)
as:

> Improves the performance of spin-wait loops. When executing a
> “spin-wait loop,” processors will suffer a severe performance
> penalty when exiting the loop because it detects a possible memory
> order violation. The PAUSE instruction provides a hint to the
> processor that the code sequence is a spin-wait loop. The processor
> uses this hint to avoid the memory order violation in most
> situations, which greatly improves processor performance. For this
> reason, it is recommended that a PAUSE instruction be placed in all
> spin-wait loops."

I have no idea of the effect of my patch on another
configuration. However, it should not hurt, and should also help to
get more meaningful statistics by not inflating the sleeping time of
workers.

What do you recommend ?
Attached is a patch that applies on Starpu-1.1 (r11549).

-- 
Benoit Lize


diff --git a/doc/doxygen/chapters/api/initialization.doxy b/doc/doxygen/chapters/api/initialization.doxy
index 8d0235d..cbed989 100644
--- a/doc/doxygen/chapters/api/initialization.doxy
+++ b/doc/doxygen/chapters/api/initialization.doxy
@@ -189,6 +189,18 @@ This is StarPU termination method. It must be called at the end of the
 application: statistics and other post-mortem debugging information
 are not guaranteed to be available until this method has been called.
 
+\fn void starpu_pause(void)
+\ingroup API_Initialization_and_Termination
+This call is used to suspend the processing of new tasks by
+workers. It can be used in a program where StarPU is used during only
+a part of the execution. Without this call, the workers continue to
+poll for new tasks in a tight loop, wasting CPU time. The symmetric
+call to \ref starpu_resume() should be used to unfreeze the workers.
+
+\fn vois starpu_resume(void)
+This is the symmetrical call to \ref starpu_pause(), used to resume
+the workers polling for new tasks.
+
 \fn int starpu_asynchronous_copy_disabled(void)
 \ingroup API_Initialization_and_Termination
 Return 1 if asynchronous data transfers between CPU and accelerators
diff --git a/include/starpu.h b/include/starpu.h
index 0f94c89..9a81457 100644
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -115,6 +115,9 @@ int starpu_conf_init(struct starpu_conf *conf);
 
 int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
 
+void starpu_pause();
+void starpu_resume();
+
 void starpu_shutdown(void);
 
 void starpu_topology_print(FILE *f);
diff --git a/src/core/workers.c b/src/core/workers.c
index 822877d..9fada18 100644
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -402,6 +402,7 @@ void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key)
 static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 {
 	pconfig->running = 1;
+	pconfig->paused = 0;
 	pconfig->submitting = 1;
 
 	STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
@@ -902,17 +903,42 @@ out:
 	}
 }
 
+/* Condition variable and mutex used to pause/resume. */
+static starpu_pthread_cond_t pause_cond = STARPU_PTHREAD_COND_INITIALIZER;
+static starpu_pthread_mutex_t pause_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
 unsigned _starpu_machine_is_running(void)
 {
 	unsigned ret;
-	/* running is just protected by a memory barrier */
+	unsigned should_pause;
+	/* running and paused are just protected by a memory barrier */
 	STARPU_RMB();
+	should_pause = config.paused;
+
+	if (STARPU_UNLIKELY(should_pause)) {
+    STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex);
+		STARPU_PTHREAD_COND_WAIT(&pause_cond, &pause_mutex);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
+	}
+
 	ANNOTATE_HAPPENS_AFTER(&config.running);
 	ret = config.running;
 	ANNOTATE_HAPPENS_BEFORE(&config.running);
 	return ret;
 }
 
+void starpu_pause()
+{
+  config.paused = 1;
+}
+
+void starpu_resume()
+{
+				STARPU_PTHREAD_MUTEX_LOCK(&pause_mutex);
+				config.paused = 0;
+				STARPU_PTHREAD_COND_BROADCAST(&pause_cond);
+				STARPU_PTHREAD_MUTEX_UNLOCK(&pause_mutex);
+}
+
 unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_NON_BLOCKING_DRIVERS
@@ -967,6 +993,9 @@ void starpu_shutdown(void)
 	initialized = CHANGING;
 	STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
 
+	/* If the workers are frozen, no progress can be made. */
+	starpu_resume();
+
 	starpu_task_wait_for_no_ready();
 
 	/* tell all workers to shutdown */
diff --git a/src/core/workers.h b/src/core/workers.h
index 7cb56a2..b5d833b 100644
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -240,6 +240,9 @@ struct _starpu_machine_config
 	/* this flag is set until the runtime is stopped */
 	unsigned running;
 
+	/* this flag is not set unless the runtime should pause */
+	unsigned paused;
+
 	/* all the sched ctx of the current instance of starpu */
 	struct _starpu_sched_ctx sched_ctxs[STARPU_NMAX_SCHED_CTXS];
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 1d2b854..fea194f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -137,6 +137,7 @@ noinst_PROGRAMS =				\
 	main/starpu_init			\
 	main/starpu_worker_exists		\
 	main/submit				\
+	main/pause_resume			\
 	datawizard/allocate			\
 	datawizard/acquire_cb			\
 	datawizard/acquire_cb_insert		\
diff --git a/tests/main/pause_resume.c b/tests/main/pause_resume.c
new file mode 100644
index 0000000..df1a5fb
--- /dev/null
+++ b/tests/main/pause_resume.c
@@ -0,0 +1,104 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <starpu.h>
+#include "../helper.h"
+
+#ifdef STARPU_QUICK_CHECK
+static unsigned ntasks = 64;
+#else
+static unsigned ntasks = 200000;
+#endif
+
+static void dummy_func(void *descr[] STARPU_ATTRIBUTE_UNUSED, void *arg STARPU_ATTRIBUTE_UNUSED)
+{
+}
+
+static struct starpu_codelet dummy_codelet =
+{
+	.cpu_funcs = {dummy_func, NULL},
+	.cuda_funcs = {dummy_func, NULL},
+	.opencl_funcs = {dummy_func, NULL},
+	.model = NULL,
+	.nbuffers = 0
+};
+
+
+int main(int argc, char **argv)
+{
+	double timing;
+	struct timeval start;
+	struct timeval end;
+	int ret;
+
+#ifdef STARPU_HAVE_VALGRIND_H
+	if(RUNNING_ON_VALGRIND) ntasks = 5;
+#endif
+
+	ret = starpu_init(NULL);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	/* Check that we can submit tasks to a "paused" StarPU and then have
+	 * it run normally.
+	 */
+	starpu_pause();
+	unsigned i;
+	for (i = 0; i < ntasks; i++)
+	{
+					ret = starpu_insert_task(&dummy_codelet, 0);
+					STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+	}
+
+	gettimeofday(&start, NULL);
+	starpu_resume();
+	starpu_task_wait_for_all();
+	gettimeofday(&end, NULL);
+	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+
+	FPRINTF(stderr, "Without interruptions:\n\tTotal: %f secs\n", timing/1000000);
+	FPRINTF(stderr, "\tPer task: %f usecs\n", timing/ntasks);
+
+	/* Do the same thing, but with a lot of interuptions to see if there
+	 * is any overhead associated with the pause/resume calls.
+	 */
+	starpu_pause();
+	for (i = 0; i < ntasks; i++) {
+					ret = starpu_insert_task(&dummy_codelet, 0);
+					STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+	}
+
+	gettimeofday(&start, NULL);
+	for (i = 0; i < 100; i++) {
+					starpu_pause();
+					starpu_resume();
+	}
+	starpu_task_wait_for_all();
+	gettimeofday(&end, NULL);
+	timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
+
+	FPRINTF(stderr, "With 100 interruptions:\n\tTotal: %f secs\n", timing/1000000);
+	FPRINTF(stderr, "\tPer task: %f usecs\n", timing/ntasks);
+
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+}



Archives gérées par MHonArc 2.6.19+.

Haut de le page