From 68a8c124e85a0bb6fd6415bf3c0ee69df82a9f0f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 24 Jan 2025 14:52:02 -0800 Subject: [PATCH] Implement the () operator on sparse tensors (#837) Note that is a fully functional "locator" for the () operator that works for *all* versatile sparse tensors. Currently, the operator is defined at sparse_tensor level, but it should be moved into tensor_impl after this. Also, clients should always be aware that the () operator for compressed levels are *not* random-access, but involve a search to find if the element is stored. --- examples/sparse_tensor.cu | 18 ++++++ include/matx/core/make_sparse_tensor.h | 16 +++++- include/matx/core/sparse_tensor.h | 76 ++++++++++++++++++++++++++ include/matx/core/tensor_impl.h | 3 +- 4 files changed, 110 insertions(+), 3 deletions(-) diff --git a/examples/sparse_tensor.cu b/examples/sparse_tensor.cu index 32bc58b8..2ac5cb14 100644 --- a/examples/sparse_tensor.cu +++ b/examples/sparse_tensor.cu @@ -97,6 +97,24 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) // print(Acoo); + // + // A very naive way to convert the sparse matrix back to a dense + // matrix. Note that one should **never** use the ()-operator in + // performance critical code, since sparse data structures do + // not provide O(1) random access to their elements (compressed + // levels will use some form of search to determine if an element + // is present). Instead, conversions (and other operations) should + // use sparse operations that are tailored for the sparse data + // structure (such as scanning by row for CSR). + // + tensor_t Dense{{m, n}}; + for (index_t i = 0; i < m; i++) { + for (index_t j = 0; j < n; j++) { + Dense(i, j) = Acoo(i, j); + } + } + print(Dense); + // TODO: operations on Acoo MATX_EXIT_HANDLER(); diff --git a/include/matx/core/make_sparse_tensor.h b/include/matx/core/make_sparse_tensor.h index 3cffa46c..6d81565b 100644 --- a/include/matx/core/make_sparse_tensor.h +++ b/include/matx/core/make_sparse_tensor.h @@ -55,11 +55,23 @@ auto make_tensor_coo(ValTensor &val, CrdTensor &row, CrdTensor &col, static_assert(ValTensor::Rank() == 1 && CrdTensor::Rank() == 1); using VAL = typename ValTensor::value_type; using CRD = typename CrdTensor::value_type; - using POS = int; // no positions, although some forms use [0,nse] + using POS = index_t; + // Note that the COO API typically does not involve positions. + // However, under the formal DSL specifications, the top level + // compression should set up pos[0] = {0, nse}. This is done + // here, using the same memory space as the other data. + POS *ptr; + matxMemorySpace_t space = GetPointerKind(val.GetStorage().data()); + matxAlloc((void **)&ptr, 2 * sizeof(POS), space, 0); + ptr[0] = 0; + ptr[1] = val.Size(0); + raw_pointer_buffer> topp{ptr, 2 * sizeof(POS), + owning}; + basic_storage tp{std::move(topp)}; raw_pointer_buffer> emptyp{nullptr, 0, owning}; basic_storage ep{std::move(emptyp)}; return sparse_tensor_t( - shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {ep, ep}); + shape, val.GetStorage(), {row.GetStorage(), col.GetStorage()}, {tp, ep}); } // Constructs a sparse matrix in CSR format directly from the values, the row diff --git a/include/matx/core/sparse_tensor.h b/include/matx/core/sparse_tensor.h index 473325b0..e0a1e03d 100644 --- a/include/matx/core/sparse_tensor.h +++ b/include/matx/core/sparse_tensor.h @@ -109,6 +109,82 @@ class sparse_tensor_t index_t crdSize(int l) const { return coordinates_[l].size() / sizeof(CRD); } index_t posSize(int l) const { return positions_[l].size() / sizeof(POS); } + // Locates position of an element at given indices, or returns -1 when not + // found. + template + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t + GetPos(index_t *lvlsz, index_t *lvl, index_t pos) const { + if constexpr (L < LVL) { + using ftype = std::tuple_element_t; + if constexpr (ftype::lvltype == LvlType::Dense) { + // Dense level: pos * size + i. + // TODO: see below, use a constexpr GetLvlSize(L) instead? + const index_t dpos = pos * lvlsz[L] + lvl[L]; + if constexpr (L + 1 < LVL) { + return GetPos(lvlsz, lvl, dpos); + } else { + return dpos; + } + } else if constexpr (ftype::lvltype == LvlType::Singleton) { + // Singleton level: pos if crd[pos] == i and next levels match. + if (this->CRDData(L)[pos] == lvl[L]) { + if constexpr (L + 1 < LVL) { + return GetPos(lvlsz, lvl, pos); + } else { + return pos; + } + } + } else if constexpr (ftype::lvltype == LvlType::Compressed || + ftype::lvltype == LvlType::CompressedNonUnique) { + // Compressed level: scan for match on i and test next levels. + const CRD *c = this->CRDData(L); + const POS *p = this->POSData(L); + for (index_t pp = p[pos], hi = p[pos + 1]; pp < hi; pp++) { + if (c[pp] == lvl[L]) { + if constexpr (L + 1 < LVL) { + const index_t cpos = GetPos(lvlsz, lvl, pp); + if constexpr (ftype::lvltype == LvlType::Compressed) { + return cpos; // always end scan (unique) + } else if (cpos != -1) { + return cpos; // only end scan on success (non-unique) + } + } else { + return pp; + } + } + } + } + } + return -1; // not found + } + + // Element getter (viz. "lhs = Acoo(0,0);"). Note that due to the compact + // nature of sparse data structures, these storage formats do not provide + // cheap random access to their elements. Instead, the implementation will + // search for a stored element at the given position (which involves a scan + // at each compressed level). The implicit value zero is returned when the + // element cannot be found. So, although functional for testing, clients + // should avoid using getters inside performance critial regions, since + // the implementation is far worse than O(1). + template + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ VAL + operator()(Is... indices) const noexcept { + static_assert( + sizeof...(Is) == DIM, + "Number of indices of operator() must match rank of sparse tensor"); + cuda::std::array dim{indices...}; + cuda::std::array lvl; + cuda::std::array lvlsz; + TF::dim2lvl(dim.data(), lvl.data(), /*asSize=*/false); + // TODO: only compute once and provide a constexpr LvlSize(l) instead? + TF::dim2lvl(this->Shape().data(), lvlsz.data(), /*asSize=*/true); + const index_t pos = GetPos(lvlsz.data(), lvl.data(), 0); + if (pos != -1) { + return this->Data()[pos]; + } + return static_cast(0); // implicit zero + } + private: // Primary storage of sparse tensor (explicitly stored element values). StorageV values_; diff --git a/include/matx/core/tensor_impl.h b/include/matx/core/tensor_impl.h index 4bfe1586..12fff7ce 100644 --- a/include/matx/core/tensor_impl.h +++ b/include/matx/core/tensor_impl.h @@ -637,7 +637,8 @@ MATX_IGNORE_WARNING_POP_GCC * @return * A shape of the data with the appropriate dimensions set */ - __MATX_INLINE__ auto Shape() const noexcept { return this->desc_.Shape(); } + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ + auto Shape() const noexcept { return this->desc_.Shape(); } /** * Get the strides the tensor from the underlying data