changeset 13267:c8aa11399b47 draft

(svn r17776) -Codechange: [SDL] make "update the video card"-process asynchronious. Profiling with gprof etc. hasn't shown us that DrawSurfaceToScreen takes a significant amount of CPU; only using TIC/TOC it became apparant that it was a heavy CPU-cycle user or that it was waiting for something. The benefit of making this function asynchronious ranges from 2%-25% (real time) during fast forward on dual core/hyperthreading-enabled CPUs; 8bpp improvements are, in my test cases, significantly smaller than 32bpp improvements. On single core non-hyperthreading-enabled CPUs the extra locking/scheduling costs up to 1% extra realtime in fast forward. You can use -v sdl:no_threads to disable threading and undo this loss. During normal non-fast-forwarded games the benefit/costs are negligable except when the gameloop takes more than about 90% of the time of a tick. Note that allegro's performance does not improve with this system, likely due to their way of getting data to the video card. It is not implemented for the OS X/Windows video backends, unless (ofcourse) SDL is used there. Funny is that the performance of the 32bpp(-anim) blitter is, at least in some test cases, significantly faster (more than 10%) than the 8bpp(-optimized) blitter when looking at real time in fast forward on a dual core CPU; it was slower. The idea comes from a paper/report by Idar Borlaug and Knut Imar Hagen.
author rubidium <rubidium@openttd.org>
date Thu, 15 Oct 2009 17:41:06 +0000
parents 88d55acd53db
children 8ddc401076c5
files src/thread/thread.h src/thread/thread_none.cpp src/thread/thread_pthread.cpp src/thread/thread_win32.cpp src/video/allegro_v.cpp src/video/sdl_v.cpp
diffstat 6 files changed, 132 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/src/thread/thread.h
+++ b/src/thread/thread.h
@@ -68,6 +68,19 @@
 	 * End of the critical section
 	 */
 	virtual void EndCritical() = 0;
+
+	/**
+	 * Wait for a signal to be send.
+	 * @pre You must be in the critical section.
+	 * @note While waiting the critical section is left.
+	 * @post You will be in the critical section.
+	 */
+	virtual void WaitForSignal() = 0;
+
+	/**
+	 * Send a signal and wake the 'thread' that was waiting for it.
+	 */
+	virtual void SendSignal() = 0;
 };
 
 #endif /* THREAD_H */
--- a/src/thread/thread_none.cpp
+++ b/src/thread/thread_none.cpp
@@ -23,6 +23,8 @@
 public:
 	virtual void BeginCritical() {}
 	virtual void EndCritical() {}
+	virtual void WaitForSignal() {}
+	virtual void SendSignal() {}
 };
 
 /* static */ ThreadMutex *ThreadMutex::New()
--- a/src/thread/thread_pthread.cpp
+++ b/src/thread/thread_pthread.cpp
@@ -12,6 +12,7 @@
 #include "../stdafx.h"
 #include "thread.h"
 #include <pthread.h>
+#include <errno.h>
 
 /**
  * POSIX pthread version for ThreadObject.
@@ -95,16 +96,21 @@
 class ThreadMutex_pthread : public ThreadMutex {
 private:
 	pthread_mutex_t mutex;
+	pthread_cond_t condition;
 
 public:
 	ThreadMutex_pthread()
 	{
 		pthread_mutex_init(&this->mutex, NULL);
+		pthread_cond_init(&this->condition, NULL);
 	}
 
 	/* virtual */ ~ThreadMutex_pthread()
 	{
-		pthread_mutex_destroy(&this->mutex);
+		int err = pthread_cond_destroy(&this->condition);
+		assert(err != EBUSY);
+		err = pthread_mutex_destroy(&this->mutex);
+		assert(err != EBUSY);
 	}
 
 	/* virtual */ void BeginCritical()
@@ -116,6 +122,16 @@
 	{
 		pthread_mutex_unlock(&this->mutex);
 	}
+
+	/* virtual */ void WaitForSignal()
+	{
+		pthread_cond_wait(&this->condition, &this->mutex);
+	}
+
+	/* virtual */ void SendSignal()
+	{
+		pthread_cond_signal(&this->condition);
+	}
 };
 
 /* static */ ThreadMutex *ThreadMutex::New()
--- a/src/thread/thread_win32.cpp
+++ b/src/thread/thread_win32.cpp
@@ -107,16 +107,19 @@
 class ThreadMutex_Win32 : public ThreadMutex {
 private:
 	CRITICAL_SECTION critical_section;
+	HANDLE event;
 
 public:
 	ThreadMutex_Win32()
 	{
 		InitializeCriticalSection(&this->critical_section);
+		this->event = CreateEvent(NULL, FALSE, FALSE, NULL);
 	}
 
 	/* virtual */ ~ThreadMutex_Win32()
 	{
 		DeleteCriticalSection(&this->critical_section);
+		CloseHandle(this->event);
 	}
 
 	/* virtual */ void BeginCritical()
@@ -128,6 +131,18 @@
 	{
 		LeaveCriticalSection(&this->critical_section);
 	}
+
+	/* virtual */ void WaitForSignal()
+	{
+		this->EndCritical();
+		WaitForSingleObject(this->event, INFINITE);
+		this->BeginCritical();
+	}
+
+	/* virtual */ void SendSignal()
+	{
+		SetEvent(this->event);
+	}
 };
 
 /* static */ ThreadMutex *ThreadMutex::New()
--- a/src/video/allegro_v.cpp
+++ b/src/video/allegro_v.cpp
@@ -7,7 +7,12 @@
  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
  */
 
-/** @file allegro_v.cpp Implementation of the Allegro video driver. */
+/**
+ * @file allegro_v.cpp Implementation of the Allegro video driver.
+ * @note Implementing threaded pushing of data to the display is
+ *       not faster (it's a few percent slower) in contrast to the
+ *       results gained with threading it for SDL.
+ */
 
 #ifdef WITH_ALLEGRO
 
--- a/src/video/sdl_v.cpp
+++ b/src/video/sdl_v.cpp
@@ -20,6 +20,7 @@
 #include "../blitter/factory.hpp"
 #include "../network/network.h"
 #include "../functions.h"
+#include "../thread/thread.h"
 #include "sdl_v.h"
 #include <SDL.h>
 
@@ -28,6 +29,15 @@
 static SDL_Surface *_sdl_screen;
 static bool _all_modes;
 
+/** Whether the drawing is/may be done in a separate thread. */
+static bool _draw_threaded;
+/** Thread used to 'draw' to the screen, i.e. push data to the screen. */
+static ThreadObject *_draw_thread = NULL;
+/** Mutex to keep the access to the shared memory controlled. */
+static ThreadMutex *_draw_mutex = NULL;
+/** Should we keep continue drawing? */
+static volatile bool _draw_continue;
+
 #define MAX_DIRTY_RECTS 100
 static SDL_Rect _dirty_rects[MAX_DIRTY_RECTS];
 static int _num_dirty_rects;
@@ -99,6 +109,22 @@
 	}
 }
 
+static void DrawSurfaceToScreenThread(void *)
+{
+	/* First wait till we 'may' start */
+	_draw_mutex->BeginCritical();
+	_draw_mutex->WaitForSignal();
+
+	while (_draw_continue) {
+		/* Then just draw and wait till we stop */
+		DrawSurfaceToScreen();
+		_draw_mutex->WaitForSignal();
+	}
+
+	_draw_mutex->EndCritical();
+	_draw_thread->Exit();
+}
+
 static const Dimension _default_resolutions[] = {
 	{ 640,  480},
 	{ 800,  600},
@@ -214,6 +240,9 @@
 		return false;
 	}
 
+	/* Delay drawing for this cycle; the next cycle will redraw the whole screen */
+	_num_dirty_rects = 0;
+
 	_screen.width = newscreen->w;
 	_screen.height = newscreen->h;
 	_screen.pitch = newscreen->pitch / (bpp / 8);
@@ -445,6 +474,9 @@
 
 	SDL_CALL SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, SDL_DEFAULT_REPEAT_INTERVAL);
 	SDL_CALL SDL_EnableUNICODE(1);
+
+	_draw_threaded = GetDriverParam(parm, "no_threads") == NULL && GetDriverParam(parm, "no_thread") == NULL;
+
 	return NULL;
 }
 
@@ -463,6 +495,25 @@
 	int numkeys;
 	Uint8 *keys;
 
+	if (_draw_threaded) {
+		/* Initialise the mutex first, because that's the thing we *need*
+		 * directly in the newly created thread. */
+		_draw_mutex = ThreadMutex::New();
+		if (_draw_mutex == NULL) {
+			_draw_threaded = false;
+		} else {
+			_draw_mutex->BeginCritical();
+			_draw_continue = true;
+
+			_draw_threaded = ThreadObject::New(&DrawSurfaceToScreenThread, NULL, &_draw_thread);
+		}
+
+		/* Free the mutex if we won't be able to use it. */
+		if (!_draw_threaded) delete _draw_mutex;
+	}
+
+	DEBUG(driver, 1, "SDL: using %sthreads", _draw_threaded ? "" : "no ");
+
 	for (;;) {
 		uint32 prev_cur_ticks = cur_ticks; // to check for wrapping
 		InteractiveRandom(); // randomness
@@ -505,23 +556,45 @@
 
 			if (old_ctrl_pressed != _ctrl_pressed) HandleCtrlChanged();
 
+			/* The gameloop is the part that can run asynchroniously. The rest
+			 * except sleeping can't. */
+			if (_draw_threaded) _draw_mutex->EndCritical();
+
 			GameLoop();
 
+			if (_draw_threaded) _draw_mutex->BeginCritical();
+
 			_screen.dst_ptr = _sdl_screen->pixels;
 			UpdateWindows();
 			if (++pal_tick > 4) {
 				CheckPaletteAnim();
 				pal_tick = 1;
 			}
-			DrawSurfaceToScreen();
+
+			/* End of the critical part. */
+			if (_draw_threaded) {
+				_draw_mutex->SendSignal();
+			} else {
+				/* Oh, we didn't have threads, then just draw unthreaded */
+				DrawSurfaceToScreen();
+			}
 		} else {
-			SDL_CALL SDL_Delay(1);
-			_screen.dst_ptr = _sdl_screen->pixels;
-			NetworkDrawChatMessage();
-			DrawMouseCursor();
-			DrawSurfaceToScreen();
+			/* Release the thread while sleeping */
+			if (_draw_threaded) _draw_mutex->EndCritical();
+			CSleep(1);
+			if (_draw_threaded) _draw_mutex->BeginCritical();
 		}
 	}
+
+	if (_draw_threaded) {
+		_draw_continue = false;
+		_draw_mutex->SendSignal();
+		_draw_mutex->EndCritical();
+		_draw_thread->Join();
+
+		delete _draw_mutex;
+		delete _draw_thread;
+	}
 }
 
 bool VideoDriver_SDL::ChangeResolution(int w, int h)