llvm-gcc-4.2/libgomp/iter.c - llvm-archive - Git at Google

 /* Copyright (C) 2005 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.

    This file is part of the GNU OpenMP Library (libgomp).

    Libgomp is free software; you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.

    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
    more details.

    You should have received a copy of the GNU Lesser General Public License
    along with libgomp; see the file COPYING.LIB.  If not, write to the
    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
    MA 02110-1301, USA.  */

 /* As a special exception, if you link this library with other files, some
    of which are compiled with GCC, to produce an executable, this library
    does not by itself cause the resulting executable to be covered by the
    GNU General Public License.  This exception does not however invalidate
    any other reasons why the executable file might be covered by the GNU
    General Public License.  */

 /* This file contains routines for managing work-share iteration, both
    for loops and sections.  */

 #include "libgomp.h"
 #include <stdlib.h>


 /* This function implements the STATIC scheduling method.  The caller should
    iterate *pstart <= x < *pend.  Return zero if there are more iterations
    to perform; nonzero if not.  Return less than 0 if this thread had
    received the absolutely last iteration.  */

 int
 gomp_iter_static_next (long *pstart, long *pend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
   struct gomp_work_share *ws = thr->ts.work_share;
   unsigned long nthreads = team ? team->nthreads : 1;

   if (thr->ts.static_trip == -1)
     return -1;

   /* Quick test for degenerate teams and orphaned constructs.  */
   if (nthreads == 1)
     {
       *pstart = ws->next;
       *pend = ws->end;
       thr->ts.static_trip = -1;
       return ws->next == ws->end;
     }

   /* We interpret chunk_size zero as "unspecified", which means that we
      should break up the iterations such that each thread makes only one
      trip through the outer loop.  */
   if (ws->chunk_size == 0)
     {
       unsigned long n, q, i;
       unsigned long s0, e0;
       long s, e;

       if (thr->ts.static_trip > 0)
 	return 1;

       /* Compute the total number of iterations.  */
       s = ws->incr + (ws->incr > 0 ? -1 : 1);
       n = (ws->end - ws->next + s) / ws->incr;
       i = thr->ts.team_id;

       /* Compute the "zero-based" start and end points.  That is, as
          if the loop began at zero and incremented by one.  */
       q = n / nthreads;
       q += (q * nthreads != n);
       s0 = q * i;
       e0 = s0 + q;
       if (e0 > n)
         e0 = n;

       /* Notice when no iterations allocated for this thread.  */
       if (s0 >= e0)
 	{
 	  thr->ts.static_trip = 1;
 	  return 1;
 	}

       /* Transform these to the actual start and end numbers.  */
       s = (long)s0 * ws->incr + ws->next;
       e = (long)e0 * ws->incr + ws->next;

       *pstart = s;
       *pend = e;
       thr->ts.static_trip = (e0 == n ? -1 : 1);
       return 0;
     }
   else
     {
       unsigned long n, s0, e0, i, c;
       long s, e;

       /* Otherwise, each thread gets exactly chunk_size iterations
 	 (if available) each time through the loop.  */

       s = ws->incr + (ws->incr > 0 ? -1 : 1);
       n = (ws->end - ws->next + s) / ws->incr;
       i = thr->ts.team_id;
       c = ws->chunk_size;

       /* Initial guess is a C sized chunk positioned nthreads iterations
 	 in, offset by our thread number.  */
       s0 = (thr->ts.static_trip * nthreads + i) * c;
       e0 = s0 + c;

       /* Detect overflow.  */
       if (s0 >= n)
 	return 1;
       if (e0 > n)
 	e0 = n;

       /* Transform these to the actual start and end numbers.  */
       s = (long)s0 * ws->incr + ws->next;
       e = (long)e0 * ws->incr + ws->next;

       *pstart = s;
       *pend = e;

       if (e0 == n)
 	thr->ts.static_trip = -1;
       else
 	thr->ts.static_trip++;
       return 0;
     }
 }


 /* This function implements the DYNAMIC scheduling method.  Arguments are
    as for gomp_iter_static_next.  This function must be called with ws->lock
    held.  */

 bool
 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_work_share *ws = thr->ts.work_share;
   long start, end, chunk, left;

   start = ws->next;
   if (start == ws->end)
     return false;

   chunk = ws->chunk_size * ws->incr;
   left = ws->end - start;
   if (ws->incr < 0)
     {
       if (chunk < left)
 	chunk = left;
     }
   else
     {
       if (chunk > left)
 	chunk = left;
     }
   end = start + chunk;

   ws->next = end;
   *pstart = start;
   *pend = end;
   return true;
 }


 #ifdef HAVE_SYNC_BUILTINS
 /* Similar, but doesn't require the lock held, and uses compare-and-swap
    instead.  Note that the only memory value that changes is ws->next.  */

 bool
 gomp_iter_dynamic_next (long *pstart, long *pend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_work_share *ws = thr->ts.work_share;
   long start, end, nend, chunk, incr;

   start = ws->next;
   end = ws->end;
   incr = ws->incr;
   chunk = ws->chunk_size * incr;

   while (1)
     {
       long left = end - start;
       long tmp;

       if (start == end)
 	return false;

       if (incr < 0)
 	{
 	  if (chunk < left)
 	    chunk = left;
 	}
       else
 	{
 	  if (chunk > left)
 	    chunk = left;
 	}
       nend = start + chunk;

       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
       if (__builtin_expect (tmp == start, 1))
 	break;

       start = tmp;
     }

   *pstart = start;
   *pend = nend;
   return true;
 }
 #endif /* HAVE_SYNC_BUILTINS */


 /* This function implements the GUIDED scheduling method.  Arguments are
    as for gomp_iter_static_next.  This function must be called with the
    work share lock held.  */

 bool
 gomp_iter_guided_next_locked (long *pstart, long *pend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_work_share *ws = thr->ts.work_share;
   struct gomp_team *team = thr->ts.team;
   unsigned long nthreads = team ? team->nthreads : 1;
   unsigned long n, q;
   long start, end;

   if (ws->next == ws->end)
     return false;

   n = (ws->end - ws->next) / ws->incr;
   q = (n + nthreads - 1) / nthreads;

   if (q < ws->chunk_size)
     q = ws->chunk_size;
   if (q > n)
     q = n;

   start = ws->next;
   end = start + q * ws->incr;

   ws->next = end;
   *pstart = start;
   *pend = end;
   return true;
 }

 #ifdef HAVE_SYNC_BUILTINS
 /* Similar, but doesn't require the lock held, and uses compare-and-swap
    instead.  Note that the only memory value that changes is ws->next.  */

 bool
 gomp_iter_guided_next (long *pstart, long *pend)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_work_share *ws = thr->ts.work_share;
   struct gomp_team *team = thr->ts.team;
   unsigned long nthreads = team ? team->nthreads : 1;
   long start, end, nend, incr;
   unsigned long chunk_size;

   start = ws->next;
   end = ws->end;
   incr = ws->incr;
   chunk_size = ws->chunk_size;

   while (1)
     {
       unsigned long n, q;
       long tmp;

       if (start == end)
 	return false;

       n = (end - start) / ws->incr;
       q = (n + nthreads - 1) / nthreads;

       if (q < chunk_size)
 	q = chunk_size;
       if (q > n)
 	q = n;

       nend = start + q * incr;

       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
       if (__builtin_expect (tmp == start, 1))
 	break;

       start = tmp;
     }

   *pstart = start;
   *pend = nend;
   return true;
 }
 #endif /* HAVE_SYNC_BUILTINS */
	/* Copyright (C) 2005 Free Software Foundation, Inc.
	Contributed by Richard Henderson <rth@redhat.com>.

	This file is part of the GNU OpenMP Library (libgomp).

	Libgomp is free software; you can redistribute it and/or modify it
	under the terms of the GNU Lesser General Public License as published by
	the Free Software Foundation; either version 2.1 of the License, or
	(at your option) any later version.

	Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
	more details.

	You should have received a copy of the GNU Lesser General Public License
	along with libgomp; see the file COPYING.LIB. If not, write to the
	Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	MA 02110-1301, USA. */

	/* As a special exception, if you link this library with other files, some
	of which are compiled with GCC, to produce an executable, this library
	does not by itself cause the resulting executable to be covered by the
	GNU General Public License. This exception does not however invalidate
	any other reasons why the executable file might be covered by the GNU
	General Public License. */

	/* This file contains routines for managing work-share iteration, both
	for loops and sections. */

	#include "libgomp.h"
	#include <stdlib.h>


	/* This function implements the STATIC scheduling method. The caller should
	iterate pstart <= x < pend. Return zero if there are more iterations
	to perform; nonzero if not. Return less than 0 if this thread had
	received the absolutely last iteration. */

	int
	gomp_iter_static_next (long pstart, long pend)
	{
	struct gomp_thread *thr = gomp_thread ();
	struct gomp_team *team = thr->ts.team;
	struct gomp_work_share *ws = thr->ts.work_share;
	unsigned long nthreads = team ? team->nthreads : 1;

	if (thr->ts.static_trip == -1)
	return -1;

	/* Quick test for degenerate teams and orphaned constructs. */
	if (nthreads == 1)
	{
	*pstart = ws->next;
	*pend = ws->end;
	thr->ts.static_trip = -1;
	return ws->next == ws->end;
	}

	/* We interpret chunk_size zero as "unspecified", which means that we
	should break up the iterations such that each thread makes only one
	trip through the outer loop. */
	if (ws->chunk_size == 0)
	{
	unsigned long n, q, i;
	unsigned long s0, e0;
	long s, e;

	if (thr->ts.static_trip > 0)
	return 1;

	/* Compute the total number of iterations. */
	s = ws->incr + (ws->incr > 0 ? -1 : 1);
	n = (ws->end - ws->next + s) / ws->incr;
	i = thr->ts.team_id;

	/* Compute the "zero-based" start and end points. That is, as
	if the loop began at zero and incremented by one. */
	q = n / nthreads;
	q += (q * nthreads != n);
	s0 = q * i;
	e0 = s0 + q;
	if (e0 > n)
	e0 = n;

	/* Notice when no iterations allocated for this thread. */
	if (s0 >= e0)
	{
	thr->ts.static_trip = 1;
	return 1;
	}

	/* Transform these to the actual start and end numbers. */
	s = (long)s0 * ws->incr + ws->next;
	e = (long)e0 * ws->incr + ws->next;

	*pstart = s;
	*pend = e;
	thr->ts.static_trip = (e0 == n ? -1 : 1);
	return 0;
	}
	else
	{
	unsigned long n, s0, e0, i, c;
	long s, e;

	/* Otherwise, each thread gets exactly chunk_size iterations
	(if available) each time through the loop. */

	s = ws->incr + (ws->incr > 0 ? -1 : 1);
	n = (ws->end - ws->next + s) / ws->incr;
	i = thr->ts.team_id;
	c = ws->chunk_size;

	/* Initial guess is a C sized chunk positioned nthreads iterations
	in, offset by our thread number. */
	s0 = (thr->ts.static_trip * nthreads + i) * c;
	e0 = s0 + c;

	/* Detect overflow. */
	if (s0 >= n)
	return 1;
	if (e0 > n)
	e0 = n;

	/* Transform these to the actual start and end numbers. */
	s = (long)s0 * ws->incr + ws->next;
	e = (long)e0 * ws->incr + ws->next;

	*pstart = s;
	*pend = e;

	if (e0 == n)
	thr->ts.static_trip = -1;
	else
	thr->ts.static_trip++;
	return 0;
	}
	}


	/* This function implements the DYNAMIC scheduling method. Arguments are
	as for gomp_iter_static_next. This function must be called with ws->lock
	held. */

	bool
	gomp_iter_dynamic_next_locked (long pstart, long pend)
	{
	struct gomp_thread *thr = gomp_thread ();
	struct gomp_work_share *ws = thr->ts.work_share;
	long start, end, chunk, left;

	start = ws->next;
	if (start == ws->end)
	return false;

	chunk = ws->chunk_size * ws->incr;
	left = ws->end - start;
	if (ws->incr < 0)
	{
	if (chunk < left)
	chunk = left;
	}
	else
	{
	if (chunk > left)
	chunk = left;
	}
	end = start + chunk;

	ws->next = end;
	*pstart = start;
	*pend = end;
	return true;
	}


	#ifdef HAVE_SYNC_BUILTINS
	/* Similar, but doesn't require the lock held, and uses compare-and-swap
	instead. Note that the only memory value that changes is ws->next. */

	bool
	gomp_iter_dynamic_next (long pstart, long pend)
	{
	struct gomp_thread *thr = gomp_thread ();
	struct gomp_work_share *ws = thr->ts.work_share;
	long start, end, nend, chunk, incr;

	start = ws->next;
	end = ws->end;
	incr = ws->incr;
	chunk = ws->chunk_size * incr;

	while (1)
	{
	long left = end - start;
	long tmp;

	if (start == end)
	return false;

	if (incr < 0)
	{
	if (chunk < left)
	chunk = left;
	}
	else
	{
	if (chunk > left)
	chunk = left;
	}
	nend = start + chunk;

	tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
	if (__builtin_expect (tmp == start, 1))
	break;

	start = tmp;
	}

	*pstart = start;
	*pend = nend;
	return true;
	}
	#endif /* HAVE_SYNC_BUILTINS */


	/* This function implements the GUIDED scheduling method. Arguments are
	as for gomp_iter_static_next. This function must be called with the
	work share lock held. */

	bool
	gomp_iter_guided_next_locked (long pstart, long pend)
	{
	struct gomp_thread *thr = gomp_thread ();
	struct gomp_work_share *ws = thr->ts.work_share;
	struct gomp_team *team = thr->ts.team;
	unsigned long nthreads = team ? team->nthreads : 1;
	unsigned long n, q;
	long start, end;

	if (ws->next == ws->end)
	return false;

	n = (ws->end - ws->next) / ws->incr;
	q = (n + nthreads - 1) / nthreads;

	if (q < ws->chunk_size)
	q = ws->chunk_size;
	if (q > n)
	q = n;

	start = ws->next;
	end = start + q * ws->incr;

	ws->next = end;
	*pstart = start;
	*pend = end;
	return true;
	}

	#ifdef HAVE_SYNC_BUILTINS
	/* Similar, but doesn't require the lock held, and uses compare-and-swap
	instead. Note that the only memory value that changes is ws->next. */

	bool
	gomp_iter_guided_next (long pstart, long pend)
	{
	struct gomp_thread *thr = gomp_thread ();
	struct gomp_work_share *ws = thr->ts.work_share;
	struct gomp_team *team = thr->ts.team;
	unsigned long nthreads = team ? team->nthreads : 1;
	long start, end, nend, incr;
	unsigned long chunk_size;

	start = ws->next;
	end = ws->end;
	incr = ws->incr;
	chunk_size = ws->chunk_size;

	while (1)
	{
	unsigned long n, q;
	long tmp;

	if (start == end)
	return false;

	n = (end - start) / ws->incr;
	q = (n + nthreads - 1) / nthreads;

	if (q < chunk_size)
	q = chunk_size;
	if (q > n)
	q = n;

	nend = start + q * incr;

	tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
	if (__builtin_expect (tmp == start, 1))
	break;

	start = tmp;
	}

	*pstart = start;
	*pend = nend;
	return true;
	}
	#endif /* HAVE_SYNC_BUILTINS */