//===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.txt for details. // //===----------------------------------------------------------------------===// #include "cean_util.h" #include "offload_common.h" // 1. allocate element of CeanReadRanges type // 2. initialized it for reading consequently contiguous ranges // described by "ap" argument CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap) { CeanReadRanges * res; // find the max contiguous range int64_t rank = ap->rank - 1; int64_t length = ap->dim[rank].size; for (; rank >= 0; rank--) { if (ap->dim[rank].stride == 1) { length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); if (rank > 0 && length != ap->dim[rank - 1].size) { break; } } else { break; } } res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) + (ap->rank - rank) * sizeof(CeanReadDim)); res->current_number = 0; res->range_size = length; res->last_noncont_ind = rank; // calculate number of contiguous ranges inside noncontiguous dimensions int count = 1; bool prev_is_cont = true; int64_t offset = 0; for (; rank >= 0; rank--) { res->Dim[rank].count = count; res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size; count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 : (ap->dim[rank].upper - ap->dim[rank].lower + ap->dim[rank].stride) / ap->dim[rank].stride); prev_is_cont = false; offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) * ap->dim[rank].size; } res->range_max_number = count; res -> ptr = (void*)ap->base; res -> init_offset = offset; return res; } // check if ranges described by 1 argument could be transferred into ranges // described by 2-nd one bool cean_ranges_match( CeanReadRanges * read_rng1, CeanReadRanges * read_rng2 ) { return ( read_rng1 == NULL || read_rng2 == NULL || (read_rng1->range_size % read_rng2->range_size == 0 || read_rng2->range_size % read_rng1->range_size == 0)); } // Set next offset and length and returns true for next range. // Returns false if the ranges are over. bool get_next_range( CeanReadRanges * read_rng, int64_t *offset ) { if (++read_rng->current_number > read_rng->range_max_number) { read_rng->current_number = 0; return false; } int rank = 0; int num = read_rng->current_number - 1; int64_t cur_offset = 0; int num_loc; for (; rank <= read_rng->last_noncont_ind; rank++) { num_loc = num / read_rng->Dim[rank].count; cur_offset += num_loc * read_rng->Dim[rank].size; num = num % read_rng->Dim[rank].count; } *offset = cur_offset + read_rng->init_offset; return true; } bool is_arr_desc_contiguous(const arr_desc *ap) { int64_t rank = ap->rank - 1; int64_t length = ap->dim[rank].size; for (; rank >= 0; rank--) { if (ap->dim[rank].stride > 1 && ap->dim[rank].upper - ap->dim[rank].lower != 0) { return false; } else if (length != ap->dim[rank].size) { for (; rank >= 0; rank--) { if (ap->dim[rank].upper - ap->dim[rank].lower != 0) { return false; } } return true; } length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); } return true; } int64_t cean_get_transf_size(CeanReadRanges * read_rng) { return(read_rng->range_max_number * read_rng->range_size); } static uint64_t last_left, last_right; typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize); static void generate_one_range( const char *spaces, uint64_t lrange, uint64_t rrange, fpp fp, int esize ) { OFFLOAD_TRACE(3, "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n", spaces, (void*)lrange, (void*)rrange, esize); if (last_left == -1) { // First range last_left = lrange; } else { if (lrange == last_right+1) { // Extend previous range, don't print } else { (*fp)(spaces, last_left, last_right, esize); last_left = lrange; } } last_right = rrange; } static void generate_mem_ranges_one_rank( const char *spaces, uint64_t base, uint64_t rank, const struct dim_desc *ddp, fpp fp, int esize ) { uint64_t lindex = ddp->lindex; uint64_t lower = ddp->lower; uint64_t upper = ddp->upper; uint64_t stride = ddp->stride; uint64_t size = ddp->size; OFFLOAD_TRACE(3, "%s " "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, " "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n", spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize); if (rank == 1) { uint64_t lrange, rrange; if (stride == 1) { lrange = base + (lower-lindex)*size; rrange = lrange + (upper-lower+1)*size - 1; generate_one_range(spaces, lrange, rrange, fp, esize); } else { for (int i=lower-lindex; i<=upper-lindex; i+=stride) { lrange = base + i*size; rrange = lrange + size - 1; generate_one_range(spaces, lrange, rrange, fp, esize); } } } else { for (int i=lower-lindex; i<=upper-lindex; i+=stride) { generate_mem_ranges_one_rank( spaces, base+i*size, rank-1, ddp+1, fp, esize); } } } static void generate_mem_ranges( const char *spaces, const arr_desc *adp, bool deref, fpp fp ) { uint64_t esize; OFFLOAD_TRACE(3, "%s " "generate_mem_ranges(adp=%p, deref=%d, fp)\n", spaces, adp, deref); last_left = -1; last_right = -2; // Element size is derived from last dimension esize = adp->dim[adp->rank-1].size; generate_mem_ranges_one_rank( // For c_cean_var the base addr is the address of the data // For c_cean_var_ptr the base addr is dereferenced to get to the data spaces, deref ? *((uint64_t*)(adp->base)) : adp->base, adp->rank, &adp->dim[0], fp, esize); (*fp)(spaces, last_left, last_right, esize); } // returns offset and length of the data to be transferred void __arr_data_offset_and_length( const arr_desc *adp, int64_t &offset, int64_t &length ) { int64_t rank = adp->rank - 1; int64_t size = adp->dim[rank].size; int64_t r_off = 0; // offset from right boundary // find the rightmost dimension which takes just part of its // range. We define it if the size of left rank is not equal // the range's length between upper and lower boungaries while (rank > 0) { size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1); if (size != adp->dim[rank - 1].size) { break; } rank--; } offset = (adp->dim[rank].lower - adp->dim[rank].lindex) * adp->dim[rank].size; // find gaps both from the left - offset and from the right - r_off for (rank--; rank >= 0; rank--) { offset += (adp->dim[rank].lower - adp->dim[rank].lindex) * adp->dim[rank].size; r_off += adp->dim[rank].size - (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) * adp->dim[rank + 1].size; } length = (adp->dim[0].upper - adp->dim[0].lindex + 1) * adp->dim[0].size - offset - r_off; } #if OFFLOAD_DEBUG > 0 void print_range( const char *spaces, uint64_t low, uint64_t high, int esize ) { char buffer[1024]; char number[32]; OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n", spaces, (void*)low, (void*)high, esize); if (console_enabled < 4) { return; } OFFLOAD_TRACE(4, "%s values:\n", spaces); int count = 0; buffer[0] = '\0'; while (low <= high) { switch (esize) { case 1: sprintf(number, "%d ", *((char *)low)); low += 1; break; case 2: sprintf(number, "%d ", *((short *)low)); low += 2; break; case 4: sprintf(number, "%d ", *((int *)low)); low += 4; break; default: sprintf(number, "0x%016x ", *((uint64_t *)low)); low += 8; break; } strcat(buffer, number); count++; if (count == 10) { OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer); count = 0; buffer[0] = '\0'; } } if (count != 0) { OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer); } } void __arr_desc_dump( const char *spaces, const char *name, const arr_desc *adp, bool deref ) { OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp); if (adp != 0) { OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n", spaces, adp->base, adp->rank); for (int i = 0; i < adp->rank; i++) { OFFLOAD_TRACE(2, "%s dimension %d: size=%lld, lindex=%lld, " "lower=%lld, upper=%lld, stride=%lld\n", spaces, i, adp->dim[i].size, adp->dim[i].lindex, adp->dim[i].lower, adp->dim[i].upper, adp->dim[i].stride); } // For c_cean_var the base addr is the address of the data // For c_cean_var_ptr the base addr is dereferenced to get to the data generate_mem_ranges(spaces, adp, deref, &print_range); } } #endif // OFFLOAD_DEBUG