468663ddbb
Former-commit-id: 1d6753294b2993e1fbf92de9366bb9544db4189b
147 lines
4.3 KiB
C
147 lines
4.3 KiB
C
/*
|
|
* Copyright 2013 Ecole Normale Superieure
|
|
* Copyright 2015 Sven Verdoolaege
|
|
*
|
|
* Use of this software is governed by the MIT license
|
|
*
|
|
* Written by Sven Verdoolaege,
|
|
* Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
|
|
*/
|
|
|
|
#include <string.h>
|
|
|
|
#include <isl/val.h>
|
|
#include <isl/space.h>
|
|
#include <isl/union_set.h>
|
|
#include <isl/schedule_node.h>
|
|
|
|
#include "hybrid.h"
|
|
#include "gpu_hybrid.h"
|
|
#include "gpu_tree.h"
|
|
#include "schedule.h"
|
|
#include "util.h"
|
|
|
|
/* Have all domain elements been filtered out before reaching
|
|
* the "node" position in the schedule tree?
|
|
*/
|
|
static isl_bool has_empty_domain(__isl_keep isl_schedule_node *node)
|
|
{
|
|
isl_union_set *domain;
|
|
isl_bool empty;
|
|
|
|
domain = isl_schedule_node_get_domain(node);
|
|
empty = isl_union_set_is_empty(domain);
|
|
isl_union_set_free(domain);
|
|
|
|
return empty;
|
|
}
|
|
|
|
/* Given a pointer to a phase in the result of hybrid tiling,
|
|
* map the phase to the device, provided the phase is non-empty.
|
|
* Empty phases can occur if the input schedule domain can be
|
|
* covered by a small number of hexagons that all belong to the same phase.
|
|
*
|
|
* The input has the following form:
|
|
*
|
|
* M - CT - P - C - ...
|
|
*
|
|
* with M the phase marker, CT the space tiling, P the original
|
|
* parent band and C the original child band.
|
|
* The (outer dimensions of the) C band need to be mapped to threads.
|
|
* The (outer dimension of the) CT band needs to be mapped to blocks.
|
|
* The mapping to shared memory needs to be computed between the CT and
|
|
* the P band.
|
|
*
|
|
* The C band is first shifted to start at zero.
|
|
* Then the appropriate markers are introduced and a kernel is
|
|
* created for the tree rooted at CT.
|
|
* If the "unroll_gpu_tile" option is set, then the AST generator
|
|
* is instructed to unroll the P and C bands.
|
|
*/
|
|
static __isl_give isl_schedule_node *update_phase(
|
|
__isl_take isl_schedule_node *node, void *user)
|
|
{
|
|
struct gpu_gen *gen = user;
|
|
int depth0, depth;
|
|
isl_ctx *ctx;
|
|
isl_id *id;
|
|
isl_bool empty_domain;
|
|
ppcg_ht_phase *phase;
|
|
|
|
empty_domain = has_empty_domain(node);
|
|
if (empty_domain < 0)
|
|
return isl_schedule_node_free(node);
|
|
if (empty_domain)
|
|
return node;
|
|
|
|
if (!node)
|
|
return NULL;
|
|
ctx = isl_schedule_node_get_ctx(node);
|
|
|
|
phase = ppcg_ht_phase_extract_from_mark(node);
|
|
|
|
depth0 = isl_schedule_node_get_tree_depth(node);
|
|
|
|
node = isl_schedule_node_child(node, 0);
|
|
|
|
node = isl_schedule_node_child(node, 0);
|
|
node = isl_schedule_node_child(node, 0);
|
|
node = ppcg_ht_phase_shift_space_point(phase, node);
|
|
if (gen->options->unroll_gpu_tile)
|
|
node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
|
|
id = isl_id_alloc(ctx, "thread", NULL);
|
|
node = isl_schedule_node_insert_mark(node, id);
|
|
node = isl_schedule_node_parent(node);
|
|
if (gen->options->unroll_gpu_tile)
|
|
node = ppcg_set_schedule_node_type(node, isl_ast_loop_unroll);
|
|
id = isl_id_alloc(ctx, "shared", NULL);
|
|
node = isl_schedule_node_insert_mark(node, id);
|
|
node = isl_schedule_node_parent(node);
|
|
|
|
node = gpu_create_kernel(gen, node, 0, NULL);
|
|
|
|
depth = isl_schedule_node_get_tree_depth(node);
|
|
node = isl_schedule_node_ancestor(node, depth - depth0);
|
|
|
|
return node;
|
|
}
|
|
|
|
/* Apply hybrid tiling on "node" and its parent based on the (valid)
|
|
* bounds on the relative dependence distances "bounds" and
|
|
* the tile sizes in "tile_sizes".
|
|
* The number of elements in "tile_sizes" is at least as large
|
|
* as the sum of the dimensions of the parent and the child node.
|
|
*
|
|
* Convert the tile_sizes to an isl_multi_val in the right space,
|
|
* insert the hybrid tiling and then create a kernel inside each phase.
|
|
* Finally, remove the phase marks.
|
|
*/
|
|
__isl_give isl_schedule_node *gpu_hybrid_tile(struct gpu_gen *gen,
|
|
__isl_take isl_schedule_node *node, __isl_take ppcg_ht_bounds *bounds,
|
|
int *tile_sizes)
|
|
{
|
|
isl_multi_val *mv;
|
|
isl_space *space, *space2;
|
|
|
|
if (!node || !bounds)
|
|
goto error;
|
|
|
|
space2 = isl_schedule_node_band_get_space(node);
|
|
node = isl_schedule_node_parent(node);
|
|
space = isl_schedule_node_band_get_space(node);
|
|
space = isl_space_product(space, space2);
|
|
mv = ppcg_multi_val_from_int_list(space, tile_sizes);
|
|
|
|
node = ppcg_ht_bounds_insert_tiling(bounds, mv, node, gen->options);
|
|
|
|
node = hybrid_tile_foreach_phase(node, &update_phase, gen);
|
|
|
|
node = hybrid_tile_drop_phase_marks(node);
|
|
|
|
return node;
|
|
error:
|
|
isl_schedule_node_free(node);
|
|
ppcg_ht_bounds_free(bounds);
|
|
return NULL;
|
|
}
|