feat: Changed inputs to const memory for general combine functions

author: Tomáš Jusko 2023-09-25 17:47:01 +0200
committer: Tomáš Jusko 2023-09-25 17:47:01 +0200
commit: 1edaa0c7ed06ac8bd654c145089b30541c5eba73 (patch)
tree: 16aff2b8e1150d05ff446d62188869e862f348ba /pyecsca/sca
parent: 708df6c01c1465f8d881840a28602547adaf70ff (diff)
download: pyecsca-1edaa0c7ed06ac8bd654c145089b30541c5eba73.tar.gz
pyecsca-1edaa0c7ed06ac8bd654c145089b30541c5eba73.tar.zst
pyecsca-1edaa0c7ed06ac8bd654c145089b30541c5eba73.zip
1 files changed, 29 insertions, 17 deletions
diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py
index f97e0b2..7a880ae 100644
--- a/pyecsca/sca/stacked_traces/combine.py
+++ b/pyecsca/sca/stacked_traces/combine.py
@@ -87,6 +87,9 @@ class BaseTraceManager:
         raise NotImplementedError
 
 
+ConstInputType = Union[npt.NDArray[np.number], npt.ArrayLike]
+
+
 CHUNK_MEMORY_RATIO = 0.4
 STREAM_COUNT = 4
 
@@ -221,17 +224,17 @@ class GPUTraceManager(BaseTraceManager):
         return int(
             chunk_memory_ratio * mem_size / element_size)
 
+    def get_traces_shape(self) -> Tuple[int, ...]:
+        return self._traces.samples.shape
+
     def _gpu_combine1D(self,
                        func,
-                       inputs: Optional[
-                           List[npt.NDArray[np.number]]] = None,
+                       const_inputs: Optional[
+                           List[ConstInputType]] = None,
                        output_count: int = 1) \
             -> Union[CombinedTrace, List[CombinedTrace]]:
-        if inputs is None:
-            inputs = [self._traces.samples]
-        else:
-            inputs = [self._traces.samples] + inputs
-        results = self._combine_func(func, inputs, output_count)
+        const_inputs = [] if const_inputs is None else const_inputs
+        results = self._combine_func(func, const_inputs, output_count)
 
         if output_count == 1:
             return CombinedTrace(
@@ -247,7 +250,7 @@ class GPUTraceManager(BaseTraceManager):
 
     def _gpu_combine1D_all(self,
                            func,
-                           inputs: List[npt.NDArray[np.number]],
+                           const_inputs: List[ConstInputType],
                            output_count: int = 1) \
             -> List[npt.NDArray[np.number]]:
         """
@@ -262,9 +265,10 @@ class GPUTraceManager(BaseTraceManager):
             raise ValueError("Something went wrong. "
                              "TPB should be an int")
 
-        device_inputs = [
-            cuda.to_device(input_)
-            for input_ in inputs
+        samples_input = cuda.to_device(self._traces.samples)
+        device_const_inputs = [
+            cuda.const.array_like(const_input)  # type: ignore
+            for const_input in const_inputs
         ]
         device_outputs = [
             cuda.device_array(self._traces.samples.shape[1])
@@ -272,13 +276,15 @@ class GPUTraceManager(BaseTraceManager):
         ]
 
         bpg = (self._traces.samples.shape[1] + self._tpb - 1) // self._tpb
-        func[bpg, self._tpb](*device_inputs, *device_outputs)
+        func[bpg, self._tpb](samples_input,
+                             *device_const_inputs,
+                             *device_outputs)
         return [device_output.copy_to_host()
                 for device_output in device_outputs]
 
     def _gpu_combine1D_chunked(self,
                                func,
-                               inputs: List[npt.NDArray[np.number]],
+                               inputs: List[ConstInputType],
                                output_count: int = 1) \
             -> List[npt.NDArray[np.number]]:
         if self._chunk_size is None:
@@ -307,6 +313,11 @@ class GPUTraceManager(BaseTraceManager):
             for _ in range(self._stream_count)
         ]
 
+        device_const_inputs = [
+            cuda.const.array_like(const_input)  # type: ignore
+            for const_input in inputs
+        ]
+
         chunk_results: List[List[npt.NDArray[np.number]]] = [
             [] for _ in range(output_count)]
 
@@ -319,7 +330,6 @@ class GPUTraceManager(BaseTraceManager):
                 event = events[chunk % self._stream_count]
                 if event is not None:
                     event.wait(stream=stream)
-                # stream.synchronize()
 
                 pinned_input = pinned_input_buffers[chunk % self._stream_count]
                 np.copyto(pinned_input, self._traces.samples[:, start:end])
@@ -335,7 +345,9 @@ class GPUTraceManager(BaseTraceManager):
                 ]
 
                 bpg = (end - start + self._tpb - 1) // self._tpb
-                func[bpg, self._tpb, stream](device_input, *device_outputs)
+                func[bpg, self._tpb, stream](device_input,
+                                             *device_const_inputs,
+                                             *device_outputs)
                 event = cuda.event()
                 event.record(stream=stream)
                 events[chunk % self._stream_count] = event
@@ -374,10 +386,10 @@ class GPUTraceManager(BaseTraceManager):
 
     def run(self,
             func: Callable,
-            inputs: Optional[List[npt.NDArray[np.number]]] = None,
+            const_inputs: Optional[List[ConstInputType]] = None,
             output_count: int = 1) \
             -> Union[CombinedTrace, List[CombinedTrace]]:
-        return self._gpu_combine1D(func, inputs, output_count)
+        return self._gpu_combine1D(func, const_inputs, output_count)
 
 
 @cuda.jit(device=True, cache=True)
author	Tomáš Jusko	2023-09-25 17:47:01 +0200
committer	Tomáš Jusko	2023-09-25 17:47:01 +0200
commit	1edaa0c7ed06ac8bd654c145089b30541c5eba73 (patch)
tree	16aff2b8e1150d05ff446d62188869e862f348ba /pyecsca/sca
parent	708df6c01c1465f8d881840a28602547adaf70ff (diff)
download	pyecsca-1edaa0c7ed06ac8bd654c145089b30541c5eba73.tar.gz pyecsca-1edaa0c7ed06ac8bd654c145089b30541c5eba73.tar.zst pyecsca-1edaa0c7ed06ac8bd654c145089b30541c5eba73.zip