reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
/*
 * Copyright 2013      Ecole Normale Superieure
 * Copyright 2015      Sven Verdoolaege
 *
 * Use of this software is governed by the MIT license
 *
 * Written by Sven Verdoolaege,
 * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
 */

#include <string.h>

#include <isl/val.h>
#include <isl/space.h>
#include <isl/union_set.h>
#include <isl/schedule_node.h>

#include "hybrid.h"
#include "gpu_hybrid.h"
#include "gpu_tree.h"
#include "schedule.h"
#include "util.h"

/* Have all domain elements been filtered out before reaching
 * the "node" position in the schedule tree?
 */
static isl_bool has_empty_domain(__isl_keep isl_schedule_node *node)
{
	isl_union_set *domain;
	isl_bool empty;

	domain = isl_schedule_node_get_domain(node);
	empty = isl_union_set_is_empty(domain);
	isl_union_set_free(domain);

	return empty;
}

/* Given a pointer to a phase in the result of hybrid tiling,
 * map the phase to the device, provided the phase is non-empty.
 * Empty phases can occur if the input schedule domain can be
 * covered by a small number of hexagons that all belong to the same phase.
 *
 * The input has the following form:
 *
 *	M - CT - P - C - ...
 *
 * with M the phase marker, CT the space tiling, P the original
 * parent band and C the original child band.
 * The (outer dimensions of the) C band need to be mapped to threads.
 * The (outer dimension of the) CT band needs to be mapped to blocks.
 * The mapping to shared memory needs to be computed between the CT and
 * the P band.
 *
 * The C band is first shifted to start at zero.
 * Then the appropriate markers are introduced and a kernel is
 * created for the tree rooted at CT.
 * If the "unroll_gpu_tile" option is set, then the AST generator
 * is instructed to unroll the P and C bands.
 */
static __isl_give isl_schedule_node *update_phase(
	__isl_take isl_schedule_node *node, void *user)
{
	struct gpu_gen *gen = user;
	int depth0, depth;
	isl_ctx *ctx;
	isl_id *id;
	isl_bool empty_domain;
	ppcg_ht_phase *phase;

	empty_domain = has_empty_domain(node);
	if (empty_domain < 0)
		return isl_schedule_node_free(node);
	if (empty_domain)
		return node;

	if (!node)
		return NULL;
	ctx = isl_schedule_node_get_ctx(node);

	phase = ppcg_ht_phase_extract_from_mark(node);

	depth0 = isl_schedule_node_get_tree_depth(node);

	node = isl_schedule_node_child(node, 0);

	node = isl_schedule_node_child(node, 0);
	node = isl_schedule_node_child(node, 0);
	node = ppcg_ht_phase_shift_space_point(phase, node);
	if (gen->options->unroll_gpu_tile)
		node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
	id = isl_id_alloc(ctx, "thread", NULL);
	node = isl_schedule_node_insert_mark(node, id);
	node = isl_schedule_node_parent(node);
	if (gen->options->unroll_gpu_tile)
		node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
	id = isl_id_alloc(ctx, "shared", NULL);
	node = isl_schedule_node_insert_mark(node, id);
	node = isl_schedule_node_parent(node);

	node = gpu_create_kernel(gen, node, 0, NULL);

	depth = isl_schedule_node_get_tree_depth(node);
	node = isl_schedule_node_ancestor(node, depth - depth0);

	return node;
}

/* Apply hybrid tiling on "node" and its parent based on the (valid)
 * bounds on the relative dependence distances "bounds" and
 * the tile sizes in "tile_sizes".
 * The number of elements in "tile_sizes" is at least as large
 * as the sum of the dimensions of the parent and the child node.
 *
 * Convert the tile_sizes to an isl_multi_val in the right space,
 * insert the hybrid tiling and then create a kernel inside each phase.
 * Finally, remove the phase marks.
 */
__isl_give isl_schedule_node *gpu_hybrid_tile(struct gpu_gen *gen,
	__isl_take isl_schedule_node *node, __isl_take ppcg_ht_bounds *bounds,
	int *tile_sizes)
{
	isl_multi_val *mv;
	isl_space *space, *space2;

	if (!node || !bounds)
		goto error;

	space2 = isl_schedule_node_band_get_space(node);
	node = isl_schedule_node_parent(node);
	space = isl_schedule_node_band_get_space(node);
	space = isl_space_product(space, space2);
	mv = ppcg_multi_val_from_int_list(space, tile_sizes);

	node = ppcg_ht_bounds_insert_tiling(bounds, mv, node, gen->options);

	node = hybrid_tile_foreach_phase(node, &update_phase, gen);

	node = hybrid_tile_drop_phase_marks(node);

	return node;
error:
	isl_schedule_node_free(node);
	ppcg_ht_bounds_free(bounds);
	return NULL;
}