My Project
Loading...
Searching...
No Matches
CuOwnerOverlapCopy.hpp
1/*
2 Copyright 2022-2023 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
20#define OPM_CUISTL_CUOWNEROVERLAPCOPY_HPP
21#include <dune/istl/owneroverlapcopy.hh>
22#include <memory>
23#include <mutex>
24#include <opm/simulators/linalg/cuistl/CuVector.hpp>
25
26namespace Opm::cuistl
27{
41template <class field_type, int block_size, class OwnerOverlapCopyCommunicationType>
43{
44public:
45 using X = CuVector<field_type>;
46
47 CuOwnerOverlapCopy(const OwnerOverlapCopyCommunicationType& cpuOwnerOverlapCopy)
48 : m_cpuOwnerOverlapCopy(cpuOwnerOverlapCopy)
49 {
50 }
58 void dot(const X& x, const X& y, field_type& output) const
59 {
60 std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
61
62 const auto dotAtRank = x.dot(y, *m_indicesOwner);
63 output = m_cpuOwnerOverlapCopy.communicator().sum(dotAtRank);
64 }
65
72 field_type norm(const X& x) const
73 {
74 auto xDotX = field_type(0);
75 this->dot(x, x, xDotX);
76
77 using std::sqrt;
78 return sqrt(xDotX);
79 }
80
88 void project(X& x) const
89 {
90 std::call_once(m_initializedIndices, [&]() { initIndexSet(); });
91 x.setZeroAtIndexSet(*m_indicesCopy);
92 }
93
100 void copyOwnerToAll(const X& source, X& dest) const
101 {
102 // TODO: [perf] Can we reduce copying from the GPU here?
103 // TODO: [perf] Maybe create a global buffer instead?
104 auto sourceAsDuneVector = source.template asDuneBlockVector<block_size>();
105 auto destAsDuneVector = dest.template asDuneBlockVector<block_size>();
106 m_cpuOwnerOverlapCopy.copyOwnerToAll(sourceAsDuneVector, destAsDuneVector);
107 dest.copyFromHost(destAsDuneVector);
108 }
109
110
111
112private:
113 const OwnerOverlapCopyCommunicationType& m_cpuOwnerOverlapCopy;
114
115 // Used to call the initIndexSet. Note that this is kind of a
116 // premature optimization, in the sense that we could just initialize these indices
117 // always, but they are not always used.
118 mutable std::once_flag m_initializedIndices;
119 mutable std::unique_ptr<CuVector<int>> m_indicesCopy;
120 mutable std::unique_ptr<CuVector<int>> m_indicesOwner;
121
122
123 void initIndexSet() const
124 {
125 // We need indices that we we will use in the project, dot and norm calls.
126 // TODO: [premature perf] Can this be run once per instance? Or do we need to rebuild every time?
127 const auto& pis = m_cpuOwnerOverlapCopy.indexSet();
128 std::vector<int> indicesCopyOnCPU;
129 std::vector<int> indicesOwnerCPU;
130 for (const auto& index : pis) {
131 if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::copy) {
132 for (int component = 0; component < block_size; ++component) {
133 indicesCopyOnCPU.push_back(index.local().local() * block_size + component);
134 }
135 }
136
137 if (index.local().attribute() == Dune::OwnerOverlapCopyAttributeSet::owner) {
138 for (int component = 0; component < block_size; ++component) {
139 indicesOwnerCPU.push_back(index.local().local() * block_size + component);
140 }
141 }
142 }
143
144 m_indicesCopy = std::make_unique<CuVector<int>>(indicesCopyOnCPU);
145 m_indicesOwner = std::make_unique<CuVector<int>>(indicesOwnerCPU);
146 }
147};
148} // namespace Opm::cuistl
149#endif
CUDA compatiable variant of Dune::OwnerOverlapCopyCommunication.
Definition CuOwnerOverlapCopy.hpp:43
void project(X &x) const
project will project x to the owned subspace
Definition CuOwnerOverlapCopy.hpp:88
field_type norm(const X &x) const
norm computes the l^2-norm of x across processes.
Definition CuOwnerOverlapCopy.hpp:72
void copyOwnerToAll(const X &source, X &dest) const
copyOwnerToAll will copy source to the CPU, then call OwnerOverlapCopyCommunicationType::copyOwnerToA...
Definition CuOwnerOverlapCopy.hpp:100
void dot(const X &x, const X &y, field_type &output) const
dot will carry out the dot product between x and y on the owned indices, then sum up the result acros...
Definition CuOwnerOverlapCopy.hpp:58