2022-09-29 11:40:40 -07:00
|
|
|
//===- StorageBase.cpp - TACO-flavored sparse tensor representation -------===//
|
|
|
|
|
//
|
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// This file contains method definitions for `SparseTensorStorageBase`.
|
|
|
|
|
// In particular we want to ensure that the default implementations of
|
|
|
|
|
// the "partial method specialization" trick aren't inline (since there's
|
2023-10-03 16:28:54 -07:00
|
|
|
// no benefit).
|
2022-09-29 11:40:40 -07:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
#include "mlir/ExecutionEngine/SparseTensor/Storage.h"
|
|
|
|
|
|
|
|
|
|
using namespace mlir::sparse_tensor;
|
|
|
|
|
|
2023-11-30 14:19:02 -08:00
|
|
|
static inline bool isAllDense(uint64_t lvlRank, const LevelType *lvlTypes) {
|
|
|
|
|
for (uint64_t l = 0; l < lvlRank; l++)
|
|
|
|
|
if (!isDenseLT(lvlTypes[l]))
|
|
|
|
|
return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-09 13:33:01 -08:00
|
|
|
SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
|
|
|
|
|
uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
|
2023-11-27 14:27:52 -08:00
|
|
|
const uint64_t *lvlSizes, const LevelType *lvlTypes,
|
2023-10-11 09:15:07 -07:00
|
|
|
const uint64_t *dim2lvl, const uint64_t *lvl2dim)
|
2022-11-09 13:33:01 -08:00
|
|
|
: dimSizes(dimSizes, dimSizes + dimRank),
|
|
|
|
|
lvlSizes(lvlSizes, lvlSizes + lvlRank),
|
|
|
|
|
lvlTypes(lvlTypes, lvlTypes + lvlRank),
|
2023-10-18 13:01:12 -07:00
|
|
|
dim2lvlVec(dim2lvl, dim2lvl + lvlRank),
|
|
|
|
|
lvl2dimVec(lvl2dim, lvl2dim + dimRank),
|
2023-11-30 14:19:02 -08:00
|
|
|
map(dimRank, lvlRank, dim2lvlVec.data(), lvl2dimVec.data()),
|
|
|
|
|
allDense(isAllDense(lvlRank, lvlTypes)) {
|
2023-10-11 09:15:07 -07:00
|
|
|
assert(dimSizes && lvlSizes && lvlTypes && dim2lvl && lvl2dim);
|
2022-11-09 13:33:01 -08:00
|
|
|
// Validate dim-indexed parameters.
|
|
|
|
|
assert(dimRank > 0 && "Trivial shape is unsupported");
|
2023-11-30 14:19:02 -08:00
|
|
|
for (uint64_t d = 0; d < dimRank; d++)
|
2022-11-09 13:33:01 -08:00
|
|
|
assert(dimSizes[d] > 0 && "Dimension size zero has trivial storage");
|
2023-10-03 16:28:54 -07:00
|
|
|
// Validate lvl-indexed parameters.
|
2022-11-09 13:33:01 -08:00
|
|
|
assert(lvlRank > 0 && "Trivial shape is unsupported");
|
2023-11-30 14:19:02 -08:00
|
|
|
for (uint64_t l = 0; l < lvlRank; l++) {
|
2022-11-09 13:33:01 -08:00
|
|
|
assert(lvlSizes[l] > 0 && "Level size zero has trivial storage");
|
2023-10-23 15:34:45 -07:00
|
|
|
assert(isDenseLvl(l) || isCompressedLvl(l) || isLooseCompressedLvl(l) ||
|
2024-02-08 19:38:42 +00:00
|
|
|
isSingletonLvl(l) || isNOutOfMLvl(l));
|
2022-09-29 11:40:40 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 14:45:57 -07:00
|
|
|
// Helper macro for wrong "partial method specialization" errors.
|
2022-09-29 11:40:40 -07:00
|
|
|
#define FATAL_PIV(NAME) \
|
2024-02-23 12:37:36 -08:00
|
|
|
fprintf(stderr, "<P,I,V> type mismatch for: " #NAME); \
|
|
|
|
|
exit(1);
|
2022-09-29 11:40:40 -07:00
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
#define IMPL_GETPOSITIONS(PNAME, P) \
|
|
|
|
|
void SparseTensorStorageBase::getPositions(std::vector<P> **, uint64_t) { \
|
|
|
|
|
FATAL_PIV("getPositions" #PNAME); \
|
2022-09-29 11:40:40 -07:00
|
|
|
}
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(IMPL_GETPOSITIONS)
|
|
|
|
|
#undef IMPL_GETPOSITIONS
|
2022-09-29 11:40:40 -07:00
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
#define IMPL_GETCOORDINATES(CNAME, C) \
|
|
|
|
|
void SparseTensorStorageBase::getCoordinates(std::vector<C> **, uint64_t) { \
|
|
|
|
|
FATAL_PIV("getCoordinates" #CNAME); \
|
2022-09-29 11:40:40 -07:00
|
|
|
}
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(IMPL_GETCOORDINATES)
|
|
|
|
|
#undef IMPL_GETCOORDINATES
|
2022-09-29 11:40:40 -07:00
|
|
|
|
2024-03-29 15:30:36 -07:00
|
|
|
#define IMPL_GETCOORDINATESBUFFER(CNAME, C) \
|
|
|
|
|
void SparseTensorStorageBase::getCoordinatesBuffer(std::vector<C> **, \
|
|
|
|
|
uint64_t) { \
|
|
|
|
|
FATAL_PIV("getCoordinatesBuffer" #CNAME); \
|
|
|
|
|
}
|
|
|
|
|
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(IMPL_GETCOORDINATESBUFFER)
|
|
|
|
|
#undef IMPL_GETCOORDINATESBUFFER
|
|
|
|
|
|
2022-09-29 11:40:40 -07:00
|
|
|
#define IMPL_GETVALUES(VNAME, V) \
|
|
|
|
|
void SparseTensorStorageBase::getValues(std::vector<V> **) { \
|
|
|
|
|
FATAL_PIV("getValues" #VNAME); \
|
|
|
|
|
}
|
2022-09-30 13:01:18 -07:00
|
|
|
MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETVALUES)
|
2022-09-29 11:40:40 -07:00
|
|
|
#undef IMPL_GETVALUES
|
|
|
|
|
|
|
|
|
|
#define IMPL_LEXINSERT(VNAME, V) \
|
|
|
|
|
void SparseTensorStorageBase::lexInsert(const uint64_t *, V) { \
|
|
|
|
|
FATAL_PIV("lexInsert" #VNAME); \
|
|
|
|
|
}
|
2022-09-30 13:01:18 -07:00
|
|
|
MLIR_SPARSETENSOR_FOREVERY_V(IMPL_LEXINSERT)
|
2022-09-29 11:40:40 -07:00
|
|
|
#undef IMPL_LEXINSERT
|
|
|
|
|
|
|
|
|
|
#define IMPL_EXPINSERT(VNAME, V) \
|
|
|
|
|
void SparseTensorStorageBase::expInsert(uint64_t *, V *, bool *, uint64_t *, \
|
2023-10-09 14:42:11 -07:00
|
|
|
uint64_t, uint64_t) { \
|
2022-09-29 11:40:40 -07:00
|
|
|
FATAL_PIV("expInsert" #VNAME); \
|
|
|
|
|
}
|
2022-09-30 13:01:18 -07:00
|
|
|
MLIR_SPARSETENSOR_FOREVERY_V(IMPL_EXPINSERT)
|
2022-09-29 11:40:40 -07:00
|
|
|
#undef IMPL_EXPINSERT
|
|
|
|
|
|
|
|
|
|
#undef FATAL_PIV
|