From c4cfc3689cf0852d47e12478b51572b5231f787d Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sun, 24 Sep 2023 00:22:18 +0200 Subject: feat: Added extra inputs to general combine function --- pyecsca/sca/stacked_traces/combine.py | 46 +++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py index 1e7f7fb..f97e0b2 100644 --- a/pyecsca/sca/stacked_traces/combine.py +++ b/pyecsca/sca/stacked_traces/combine.py @@ -221,9 +221,17 @@ class GPUTraceManager(BaseTraceManager): return int( chunk_memory_ratio * mem_size / element_size) - def _gpu_combine1D(self, func, output_count: int = 1) \ + def _gpu_combine1D(self, + func, + inputs: Optional[ + List[npt.NDArray[np.number]]] = None, + output_count: int = 1) \ -> Union[CombinedTrace, List[CombinedTrace]]: - results = self._combine_func(func, output_count) + if inputs is None: + inputs = [self._traces.samples] + else: + inputs = [self._traces.samples] + inputs + results = self._combine_func(func, inputs, output_count) if output_count == 1: return CombinedTrace( @@ -237,7 +245,10 @@ class GPUTraceManager(BaseTraceManager): in results ] - def _gpu_combine1D_all(self, func, output_count: int = 1) \ + def _gpu_combine1D_all(self, + func, + inputs: List[npt.NDArray[np.number]], + output_count: int = 1) \ -> List[npt.NDArray[np.number]]: """ Runs a combination function on the samples column-wise. @@ -251,18 +262,24 @@ class GPUTraceManager(BaseTraceManager): raise ValueError("Something went wrong. " "TPB should be an int") - device_input = cuda.to_device(self._traces.samples) + device_inputs = [ + cuda.to_device(input_) + for input_ in inputs + ] device_outputs = [ cuda.device_array(self._traces.samples.shape[1]) for _ in range(output_count) ] bpg = (self._traces.samples.shape[1] + self._tpb - 1) // self._tpb - func[bpg, self._tpb](device_input, *device_outputs) + func[bpg, self._tpb](*device_inputs, *device_outputs) return [device_output.copy_to_host() for device_output in device_outputs] - def _gpu_combine1D_chunked(self, func, output_count: int = 1) \ + def _gpu_combine1D_chunked(self, + func, + inputs: List[npt.NDArray[np.number]], + output_count: int = 1) \ -> List[npt.NDArray[np.number]]: if self._chunk_size is None: raise ValueError("Something went wrong. " @@ -348,13 +365,20 @@ class GPUTraceManager(BaseTraceManager): def variance(self) -> CombinedTrace: return cast(CombinedTrace, self._gpu_combine1D(gpu_variance, 1)) - def average_and_variance(self) -> Tuple[CombinedTrace, CombinedTrace]: + def average_and_variance(self) -> List[CombinedTrace]: averages, variances = self._gpu_combine1D(gpu_avg_var, 2) - return averages, variances + return [averages, variances] def add(self) -> CombinedTrace: return cast(CombinedTrace, self._gpu_combine1D(gpu_add, 1)) + def run(self, + func: Callable, + inputs: Optional[List[npt.NDArray[np.number]]] = None, + output_count: int = 1) \ + -> Union[CombinedTrace, List[CombinedTrace]]: + return self._gpu_combine1D(func, inputs, output_count) + @cuda.jit(device=True, cache=True) def _gpu_average(col: int, samples: npt.NDArray[np.number], @@ -390,7 +414,8 @@ def gpu_average(samples: npt.NDArray[np.number], @cuda.jit(device=True, cache=True) -def _gpu_var_from_avg(col: int, samples: npt.NDArray[np.number], +def _gpu_var_from_avg(col: int, + samples: npt.NDArray[np.number], averages: npt.NDArray[np.number], result: npt.NDArray[np.number]): """ @@ -531,7 +556,8 @@ class CPUTraceManager: """ # TODO: Consider other ways to implement this return CombinedTrace( - np.average(self.traces.samples[np.apply_along_axis(condition, 1, self.traces.samples)], 1), + np.average(self.traces.samples[np.apply_along_axis( + condition, 1, self.traces.samples)], 1), self.traces.meta ) -- cgit v1.2.3-70-g09d2 From 708df6c01c1465f8d881840a28602547adaf70ff Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sun, 24 Sep 2023 00:22:47 +0200 Subject: feat: Added Pearson correlation coefficient for GPU --- pyecsca/sca/stacked_traces/correlate.py | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 pyecsca/sca/stacked_traces/correlate.py diff --git a/pyecsca/sca/stacked_traces/correlate.py b/pyecsca/sca/stacked_traces/correlate.py new file mode 100644 index 0000000..10705ca --- /dev/null +++ b/pyecsca/sca/stacked_traces/correlate.py @@ -0,0 +1,44 @@ +import numpy as np +import numpy.typing as npt +from numba import cuda +from math import sqrt + + +@cuda.jit(device=True, cache=True) +def gpu_pearson_corr(samples: npt.NDArray[np.number], + intermediate_values: npt.NDArray[np.number], + result: cuda.devicearray.DeviceNDArray): + """ + Calculates the Pearson correlation coefficient between the given samples and intermediate values using GPU acceleration. + + :param samples: A 2D array of shape (n, m) containing the samples. + :type samples: npt.NDArray[np.number] + :param intermediate_values: A 1D array of shape (n,) containing the intermediate values. + :type intermediate_values: npt.NDArray[np.number] + :param result: A 1D array of shape (m,) to store the resulting correlation coefficients. + :type result: cuda.devicearray.DeviceNDArray + """ + col: int = cuda.grid(1) # type: ignore + + if col >= samples.shape[1]: # type: ignore + return + + n = samples.shape[0] + samples_sum = 0. + samples_sq_sum = 0. + intermed_sum = 0. + intermed_sq_sum = 0. + product_sum = 0. + + for row in range(n): + samples_sum += samples[row, col] + samples_sq_sum += samples[row, col] ** 2 + intermed_sum += intermediate_values[row] + intermed_sq_sum += intermediate_values[row] ** 2 + product_sum += samples[row, col] * intermediate_values[row] + + numerator = n * product_sum - samples_sum * intermed_sum + denominator = (sqrt(n * samples_sq_sum - samples_sum * samples_sum) + * sqrt(n * intermed_sq_sum - intermed_sum * intermed_sum)) + + result[col] = numerator / denominator -- cgit v1.2.3-70-g09d2 From 1edaa0c7ed06ac8bd654c145089b30541c5eba73 Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Mon, 25 Sep 2023 17:47:01 +0200 Subject: feat: Changed inputs to const memory for general combine functions --- pyecsca/sca/stacked_traces/combine.py | 46 ++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py index f97e0b2..7a880ae 100644 --- a/pyecsca/sca/stacked_traces/combine.py +++ b/pyecsca/sca/stacked_traces/combine.py @@ -87,6 +87,9 @@ class BaseTraceManager: raise NotImplementedError +ConstInputType = Union[npt.NDArray[np.number], npt.ArrayLike] + + CHUNK_MEMORY_RATIO = 0.4 STREAM_COUNT = 4 @@ -221,17 +224,17 @@ class GPUTraceManager(BaseTraceManager): return int( chunk_memory_ratio * mem_size / element_size) + def get_traces_shape(self) -> Tuple[int, ...]: + return self._traces.samples.shape + def _gpu_combine1D(self, func, - inputs: Optional[ - List[npt.NDArray[np.number]]] = None, + const_inputs: Optional[ + List[ConstInputType]] = None, output_count: int = 1) \ -> Union[CombinedTrace, List[CombinedTrace]]: - if inputs is None: - inputs = [self._traces.samples] - else: - inputs = [self._traces.samples] + inputs - results = self._combine_func(func, inputs, output_count) + const_inputs = [] if const_inputs is None else const_inputs + results = self._combine_func(func, const_inputs, output_count) if output_count == 1: return CombinedTrace( @@ -247,7 +250,7 @@ class GPUTraceManager(BaseTraceManager): def _gpu_combine1D_all(self, func, - inputs: List[npt.NDArray[np.number]], + const_inputs: List[ConstInputType], output_count: int = 1) \ -> List[npt.NDArray[np.number]]: """ @@ -262,9 +265,10 @@ class GPUTraceManager(BaseTraceManager): raise ValueError("Something went wrong. " "TPB should be an int") - device_inputs = [ - cuda.to_device(input_) - for input_ in inputs + samples_input = cuda.to_device(self._traces.samples) + device_const_inputs = [ + cuda.const.array_like(const_input) # type: ignore + for const_input in const_inputs ] device_outputs = [ cuda.device_array(self._traces.samples.shape[1]) @@ -272,13 +276,15 @@ class GPUTraceManager(BaseTraceManager): ] bpg = (self._traces.samples.shape[1] + self._tpb - 1) // self._tpb - func[bpg, self._tpb](*device_inputs, *device_outputs) + func[bpg, self._tpb](samples_input, + *device_const_inputs, + *device_outputs) return [device_output.copy_to_host() for device_output in device_outputs] def _gpu_combine1D_chunked(self, func, - inputs: List[npt.NDArray[np.number]], + inputs: List[ConstInputType], output_count: int = 1) \ -> List[npt.NDArray[np.number]]: if self._chunk_size is None: @@ -307,6 +313,11 @@ class GPUTraceManager(BaseTraceManager): for _ in range(self._stream_count) ] + device_const_inputs = [ + cuda.const.array_like(const_input) # type: ignore + for const_input in inputs + ] + chunk_results: List[List[npt.NDArray[np.number]]] = [ [] for _ in range(output_count)] @@ -319,7 +330,6 @@ class GPUTraceManager(BaseTraceManager): event = events[chunk % self._stream_count] if event is not None: event.wait(stream=stream) - # stream.synchronize() pinned_input = pinned_input_buffers[chunk % self._stream_count] np.copyto(pinned_input, self._traces.samples[:, start:end]) @@ -335,7 +345,9 @@ class GPUTraceManager(BaseTraceManager): ] bpg = (end - start + self._tpb - 1) // self._tpb - func[bpg, self._tpb, stream](device_input, *device_outputs) + func[bpg, self._tpb, stream](device_input, + *device_const_inputs, + *device_outputs) event = cuda.event() event.record(stream=stream) events[chunk % self._stream_count] = event @@ -374,10 +386,10 @@ class GPUTraceManager(BaseTraceManager): def run(self, func: Callable, - inputs: Optional[List[npt.NDArray[np.number]]] = None, + const_inputs: Optional[List[ConstInputType]] = None, output_count: int = 1) \ -> Union[CombinedTrace, List[CombinedTrace]]: - return self._gpu_combine1D(func, inputs, output_count) + return self._gpu_combine1D(func, const_inputs, output_count) @cuda.jit(device=True, cache=True) -- cgit v1.2.3-70-g09d2 From 7c2ec79c0181d44b0ef55360f6baaff64f00a97b Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Mon, 25 Sep 2023 17:47:57 +0200 Subject: feat: Added host Pearson coefficient runner function --- pyecsca/sca/stacked_traces/correlate.py | 47 ++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/pyecsca/sca/stacked_traces/correlate.py b/pyecsca/sca/stacked_traces/correlate.py index 10705ca..862e664 100644 --- a/pyecsca/sca/stacked_traces/correlate.py +++ b/pyecsca/sca/stacked_traces/correlate.py @@ -1,13 +1,46 @@ import numpy as np import numpy.typing as npt from numba import cuda +from numba.cuda.cudadrv.devicearray import DeviceNDArray from math import sqrt +from typing import List, Optional, Union +from .combine import GPUTraceManager +from .stacked_traces import StackedTraces +from ..trace.trace import CombinedTrace + + +def gpu_pearson_corr(intermediate_values: npt.NDArray[np.number], + stacked_traces: Optional[StackedTraces] = None, + trace_manager: Optional[GPUTraceManager] = None, + **tm_kwargs) -> Union[CombinedTrace, List[CombinedTrace]]: + if (stacked_traces is None) == (trace_manager is None): + raise ValueError("Either samples or trace manager must be given.") + + if trace_manager is None: + assert stacked_traces is not None + trace_manager = GPUTraceManager(stacked_traces, **tm_kwargs) + + if (len(intermediate_values.shape) != 1 + or (intermediate_values.shape[0] + != trace_manager.get_traces_shape()[0])): + raise ValueError("Intermediate values have to be a vector " + "as long as trace_count") + + intermed_sum: np.number = np.sum(intermediate_values) + intermed_sq_sum: np.number = np.sum(np.square(intermediate_values)) + + return trace_manager.run( + _gpu_pearson_corr, + [intermediate_values, [intermed_sum], [intermed_sq_sum]] + ) @cuda.jit(device=True, cache=True) -def gpu_pearson_corr(samples: npt.NDArray[np.number], - intermediate_values: npt.NDArray[np.number], - result: cuda.devicearray.DeviceNDArray): +def _gpu_pearson_corr(samples: DeviceNDArray, + intermediate_values: DeviceNDArray, + intermed_sum: DeviceNDArray, + intermed_sq_sum: DeviceNDArray, + result: DeviceNDArray): """ Calculates the Pearson correlation coefficient between the given samples and intermediate values using GPU acceleration. @@ -26,19 +59,15 @@ def gpu_pearson_corr(samples: npt.NDArray[np.number], n = samples.shape[0] samples_sum = 0. samples_sq_sum = 0. - intermed_sum = 0. - intermed_sq_sum = 0. product_sum = 0. for row in range(n): samples_sum += samples[row, col] samples_sq_sum += samples[row, col] ** 2 - intermed_sum += intermediate_values[row] - intermed_sq_sum += intermediate_values[row] ** 2 product_sum += samples[row, col] * intermediate_values[row] numerator = n * product_sum - samples_sum * intermed_sum - denominator = (sqrt(n * samples_sq_sum - samples_sum * samples_sum) - * sqrt(n * intermed_sq_sum - intermed_sum * intermed_sum)) + denominator = (sqrt(n * samples_sq_sum - samples_sum ** 2) + * sqrt(n * intermed_sq_sum[0] - intermed_sum[0] ** 2)) result[col] = numerator / denominator -- cgit v1.2.3-70-g09d2 From 9094d16a62de9ca48141f5c447cc5771c7c1a4cd Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 00:03:23 +0200 Subject: Changed const memory to pageable memory --- pyecsca/sca/stacked_traces/combine.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py index 7a880ae..d5ecf32 100644 --- a/pyecsca/sca/stacked_traces/combine.py +++ b/pyecsca/sca/stacked_traces/combine.py @@ -87,7 +87,7 @@ class BaseTraceManager: raise NotImplementedError -ConstInputType = Union[npt.NDArray[np.number], npt.ArrayLike] +InputType = Union[npt.NDArray[np.number], npt.ArrayLike] CHUNK_MEMORY_RATIO = 0.4 @@ -229,12 +229,11 @@ class GPUTraceManager(BaseTraceManager): def _gpu_combine1D(self, func, - const_inputs: Optional[ - List[ConstInputType]] = None, + inputs: Optional[List[InputType]] = None, output_count: int = 1) \ -> Union[CombinedTrace, List[CombinedTrace]]: - const_inputs = [] if const_inputs is None else const_inputs - results = self._combine_func(func, const_inputs, output_count) + inputs = [] if inputs is None else inputs + results = self._combine_func(func, inputs, output_count) if output_count == 1: return CombinedTrace( @@ -250,7 +249,7 @@ class GPUTraceManager(BaseTraceManager): def _gpu_combine1D_all(self, func, - const_inputs: List[ConstInputType], + inputs: List[InputType], output_count: int = 1) \ -> List[npt.NDArray[np.number]]: """ @@ -266,9 +265,9 @@ class GPUTraceManager(BaseTraceManager): "TPB should be an int") samples_input = cuda.to_device(self._traces.samples) - device_const_inputs = [ - cuda.const.array_like(const_input) # type: ignore - for const_input in const_inputs + device_inputs = [ + cuda.to_device(inp) # type: ignore + for inp in inputs ] device_outputs = [ cuda.device_array(self._traces.samples.shape[1]) @@ -277,14 +276,14 @@ class GPUTraceManager(BaseTraceManager): bpg = (self._traces.samples.shape[1] + self._tpb - 1) // self._tpb func[bpg, self._tpb](samples_input, - *device_const_inputs, + *device_inputs, *device_outputs) return [device_output.copy_to_host() for device_output in device_outputs] def _gpu_combine1D_chunked(self, func, - inputs: List[ConstInputType], + inputs: List[InputType], output_count: int = 1) \ -> List[npt.NDArray[np.number]]: if self._chunk_size is None: @@ -313,9 +312,9 @@ class GPUTraceManager(BaseTraceManager): for _ in range(self._stream_count) ] - device_const_inputs = [ - cuda.const.array_like(const_input) # type: ignore - for const_input in inputs + device_inputs = [ + cuda.const.array_like(inp) # type: ignore + for inp in inputs ] chunk_results: List[List[npt.NDArray[np.number]]] = [ @@ -346,7 +345,7 @@ class GPUTraceManager(BaseTraceManager): bpg = (end - start + self._tpb - 1) // self._tpb func[bpg, self._tpb, stream](device_input, - *device_const_inputs, + *device_inputs, *device_outputs) event = cuda.event() event.record(stream=stream) @@ -386,10 +385,10 @@ class GPUTraceManager(BaseTraceManager): def run(self, func: Callable, - const_inputs: Optional[List[ConstInputType]] = None, + inputs: Optional[List[InputType]] = None, output_count: int = 1) \ -> Union[CombinedTrace, List[CombinedTrace]]: - return self._gpu_combine1D(func, const_inputs, output_count) + return self._gpu_combine1D(func, inputs, output_count) @cuda.jit(device=True, cache=True) -- cgit v1.2.3-70-g09d2 From 4b585447ce4bcfaef49123ce4630a5a85b3a16e0 Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 00:05:11 +0200 Subject: fix: Fixed GPU Pearson corr coefficient --- pyecsca/sca/stacked_traces/correlate.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pyecsca/sca/stacked_traces/correlate.py b/pyecsca/sca/stacked_traces/correlate.py index 862e664..bac9f7e 100644 --- a/pyecsca/sca/stacked_traces/correlate.py +++ b/pyecsca/sca/stacked_traces/correlate.py @@ -4,7 +4,7 @@ from numba import cuda from numba.cuda.cudadrv.devicearray import DeviceNDArray from math import sqrt from typing import List, Optional, Union -from .combine import GPUTraceManager +from .combine import InputType, GPUTraceManager from .stacked_traces import StackedTraces from ..trace.trace import CombinedTrace @@ -28,14 +28,17 @@ def gpu_pearson_corr(intermediate_values: npt.NDArray[np.number], intermed_sum: np.number = np.sum(intermediate_values) intermed_sq_sum: np.number = np.sum(np.square(intermediate_values)) + inputs: List[InputType] = [intermediate_values, + np.array([intermed_sum]), + np.array([intermed_sq_sum])] return trace_manager.run( _gpu_pearson_corr, - [intermediate_values, [intermed_sum], [intermed_sq_sum]] + inputs ) -@cuda.jit(device=True, cache=True) +@cuda.jit(cache=True) def _gpu_pearson_corr(samples: DeviceNDArray, intermediate_values: DeviceNDArray, intermed_sum: DeviceNDArray, @@ -66,8 +69,8 @@ def _gpu_pearson_corr(samples: DeviceNDArray, samples_sq_sum += samples[row, col] ** 2 product_sum += samples[row, col] * intermediate_values[row] - numerator = n * product_sum - samples_sum * intermed_sum - denominator = (sqrt(n * samples_sq_sum - samples_sum ** 2) - * sqrt(n * intermed_sq_sum[0] - intermed_sum[0] ** 2)) + numerator = float(n) * product_sum - samples_sum * intermed_sum[0] + denominator = (sqrt(float(n) * samples_sq_sum - samples_sum ** 2) + * sqrt(float(n) * intermed_sq_sum[0] - intermed_sum[0] ** 2)) result[col] = numerator / denominator -- cgit v1.2.3-70-g09d2 From 0dd8114e96cdad82e572e7b74eb7ed46b0c92c83 Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 00:09:18 +0200 Subject: fix: Typing and wrong arguments --- pyecsca/sca/stacked_traces/combine.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py index d5ecf32..e9a54e0 100644 --- a/pyecsca/sca/stacked_traces/combine.py +++ b/pyecsca/sca/stacked_traces/combine.py @@ -363,7 +363,7 @@ class GPUTraceManager(BaseTraceManager): return [np.concatenate(chunk_result) for chunk_result in chunk_results] def average(self) -> CombinedTrace: - return cast(CombinedTrace, self._gpu_combine1D(gpu_average, 1)) + return cast(CombinedTrace, self._gpu_combine1D(gpu_average)) def conditional_average(self, cond: Callable[[npt.NDArray[np.number]], bool]) \ @@ -371,17 +371,17 @@ class GPUTraceManager(BaseTraceManager): raise NotImplementedError() def standard_deviation(self) -> CombinedTrace: - return cast(CombinedTrace, self._gpu_combine1D(gpu_std_dev, 1)) + return cast(CombinedTrace, self._gpu_combine1D(gpu_std_dev)) def variance(self) -> CombinedTrace: - return cast(CombinedTrace, self._gpu_combine1D(gpu_variance, 1)) + return cast(CombinedTrace, self._gpu_combine1D(gpu_variance)) def average_and_variance(self) -> List[CombinedTrace]: - averages, variances = self._gpu_combine1D(gpu_avg_var, 2) + averages, variances = self._gpu_combine1D(gpu_avg_var, output_count=2) return [averages, variances] def add(self) -> CombinedTrace: - return cast(CombinedTrace, self._gpu_combine1D(gpu_add, 1)) + return cast(CombinedTrace, self._gpu_combine1D(gpu_add)) def run(self, func: Callable, @@ -416,7 +416,7 @@ def gpu_average(samples: npt.NDArray[np.number], :param samples: Stacked traces' samples. :param result: Result output array. """ - col = cuda.grid(1) + col = cuda.grid(1) # type: ignore if col >= samples.shape[1]: return @@ -467,7 +467,7 @@ def gpu_std_dev(samples: npt.NDArray[np.number], :param samples: Stacked traces' samples. :param result: Result output array. """ - col = cuda.grid(1) + col = cuda.grid(1) # type: ignore if col >= samples.shape[1]: return @@ -486,7 +486,7 @@ def gpu_variance(samples: npt.NDArray[np.number], :param samples: Stacked traces' samples. :param result: Result output array. """ - col = cuda.grid(1) + col = cuda.grid(1) # type: ignore if col >= samples.shape[1]: return @@ -505,7 +505,7 @@ def gpu_avg_var(samples: npt.NDArray[np.number], :param result_avg: Result average output array. :param result_var: Result variance output array. """ - col = cuda.grid(1) + col = cuda.grid(1) # type: ignore if col >= samples.shape[1]: return @@ -523,7 +523,7 @@ def gpu_add(samples: npt.NDArray[np.number], :param samples: Stacked traces' samples. :param result: Result output array. """ - col = cuda.grid(1) + col = cuda.grid(1) # type: ignore if col >= samples.shape[1]: return -- cgit v1.2.3-70-g09d2 From 7468e32e167a1831cab66a1466bd48b9df644c65 Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 00:09:33 +0200 Subject: fix: Lines too long --- pyecsca/sca/stacked_traces/correlate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyecsca/sca/stacked_traces/correlate.py b/pyecsca/sca/stacked_traces/correlate.py index bac9f7e..3e0f1d2 100644 --- a/pyecsca/sca/stacked_traces/correlate.py +++ b/pyecsca/sca/stacked_traces/correlate.py @@ -70,7 +70,8 @@ def _gpu_pearson_corr(samples: DeviceNDArray, product_sum += samples[row, col] * intermediate_values[row] numerator = float(n) * product_sum - samples_sum * intermed_sum[0] - denominator = (sqrt(float(n) * samples_sq_sum - samples_sum ** 2) - * sqrt(float(n) * intermed_sq_sum[0] - intermed_sum[0] ** 2)) + denom_samp = sqrt(float(n) * samples_sq_sum - samples_sum ** 2) + denom_int = sqrt(float(n) * intermed_sq_sum[0] - intermed_sum[0] ** 2) + denominator = denom_samp * denom_int result[col] = numerator / denominator -- cgit v1.2.3-70-g09d2 From 31414412c1c8c8a17497101a43cdcf1d46c1a7dc Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 00:10:51 +0200 Subject: feat: Added GPU Pearson corr coefficient tests --- test/sca/test_stacked_correlate.py | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 test/sca/test_stacked_correlate.py diff --git a/test/sca/test_stacked_correlate.py b/test/sca/test_stacked_correlate.py new file mode 100644 index 0000000..ce5a9ed --- /dev/null +++ b/test/sca/test_stacked_correlate.py @@ -0,0 +1,74 @@ +import pytest +from numba import cuda +import numpy as np +from pyecsca.sca import ( + StackedTraces, + GPUTraceManager, + CombinedTrace +) +from pyecsca.sca.stacked_traces.correlate import gpu_pearson_corr + +TPB = 128 +TRACE_COUNT = 2 ** 10 +TRACE_LEN = 2 ** 15 +RTOL = 1e-5 +ATOL = 1e-5 + + +@pytest.fixture() +def samples(): + np.random.seed(0x1234) + return np.random.rand(TRACE_COUNT, TRACE_LEN).astype(np.float32, order="F") + + +@pytest.fixture() +def gpu_manager(samples): + if not cuda.is_available(): + pytest.skip("CUDA not available") + return GPUTraceManager(StackedTraces(samples), TPB) + + +@pytest.fixture() +def intermediate_values(): + np.random.seed(0x1234) + return np.random.rand(TRACE_COUNT) + + +def pearson_corr(samples, intermediate_values): + int_sum = np.sum(intermediate_values) + int_sq_sum = np.sum(np.square(intermediate_values)) + samples_sum = np.sum(samples, axis=0) + samples_sq_sum = np.sum(np.square(samples), axis=0) + samples_intermed_sum = np.sum( + samples * intermediate_values[:, None], axis=0) + n = samples.shape[0] + + return (n * samples_intermed_sum - int_sum * samples_sum) / \ + (np.sqrt(n * int_sq_sum - int_sum ** 2) * + np.sqrt(n * samples_sq_sum - np.square(samples_sum))) + + +def test_pearson_coef_no_chunking(samples, gpu_manager, intermediate_values): + corr_gpu = gpu_pearson_corr(intermediate_values, + trace_manager=gpu_manager) + corr_cmp = pearson_corr(samples, intermediate_values) + + assert isinstance(corr_gpu, CombinedTrace) + assert corr_gpu.samples.shape == \ + corr_cmp.shape + + assert all(np.isclose(corr_gpu.samples, corr_cmp, rtol=RTOL, atol=ATOL)) + + +def test_pearson_coef_chunking(samples, gpu_manager, intermediate_values): + corr_gpu = gpu_pearson_corr(intermediate_values, + trace_manager=gpu_manager, + chunk_size=2 ** 5, + stream_count=4) + corr_cmp = pearson_corr(samples, intermediate_values) + + assert isinstance(corr_gpu, CombinedTrace) + assert corr_gpu.samples.shape == \ + corr_cmp.shape + + assert all(np.isclose(corr_gpu.samples, corr_cmp, rtol=RTOL, atol=ATOL)) -- cgit v1.2.3-70-g09d2 From 74ea8bcad4408663598985440589abbc062df55e Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 00:16:44 +0200 Subject: fix: Typing --- pyecsca/sca/stacked_traces/combine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py index e9a54e0..6e11162 100644 --- a/pyecsca/sca/stacked_traces/combine.py +++ b/pyecsca/sca/stacked_traces/combine.py @@ -67,7 +67,7 @@ class BaseTraceManager: """ raise NotImplementedError - def average_and_variance(self) -> Tuple[CombinedTrace, CombinedTrace]: + def average_and_variance(self) -> List[CombinedTrace]: """ Compute the sample average and variance of the :paramref:`~.average_and_variance.traces`, sample-wise. @@ -596,17 +596,17 @@ class CPUTraceManager: self.traces.meta ) - def average_and_variance(self) -> Tuple[CombinedTrace, CombinedTrace]: + def average_and_variance(self) -> List[CombinedTrace]: """ Compute the average and sample variance of the :paramref:`~.average_and_variance.traces`, sample-wise. :param traces: :return: """ - return ( + return [ self.average(), self.variance() - ) + ] def add(self) -> CombinedTrace: """ -- cgit v1.2.3-70-g09d2 From d66c3dc971846c490a9f846e12be299a27856e69 Mon Sep 17 00:00:00 2001 From: Tomáš Jusko Date: Sat, 30 Sep 2023 16:44:18 +0200 Subject: fix: PR requested changes --- pyecsca/sca/stacked_traces/combine.py | 3 ++- pyecsca/sca/stacked_traces/correlate.py | 5 ++++- test/sca/test_stacked_correlate.py | 12 +----------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pyecsca/sca/stacked_traces/combine.py b/pyecsca/sca/stacked_traces/combine.py index 6e11162..15d5235 100644 --- a/pyecsca/sca/stacked_traces/combine.py +++ b/pyecsca/sca/stacked_traces/combine.py @@ -224,7 +224,8 @@ class GPUTraceManager(BaseTraceManager): return int( chunk_memory_ratio * mem_size / element_size) - def get_traces_shape(self) -> Tuple[int, ...]: + @property + def traces_shape(self) -> Tuple[int, ...]: return self._traces.samples.shape def _gpu_combine1D(self, diff --git a/pyecsca/sca/stacked_traces/correlate.py b/pyecsca/sca/stacked_traces/correlate.py index 3e0f1d2..c5277d9 100644 --- a/pyecsca/sca/stacked_traces/correlate.py +++ b/pyecsca/sca/stacked_traces/correlate.py @@ -22,7 +22,7 @@ def gpu_pearson_corr(intermediate_values: npt.NDArray[np.number], if (len(intermediate_values.shape) != 1 or (intermediate_values.shape[0] - != trace_manager.get_traces_shape()[0])): + != trace_manager.traces_shape[0])): raise ValueError("Intermediate values have to be a vector " "as long as trace_count") @@ -51,6 +51,9 @@ def _gpu_pearson_corr(samples: DeviceNDArray, :type samples: npt.NDArray[np.number] :param intermediate_values: A 1D array of shape (n,) containing the intermediate values. :type intermediate_values: npt.NDArray[np.number] + :param intermed_sum: A 1D array of shape (1,) containing the precomputed sum of the intermediate values. + :type intermed_sum: npt.NDArray[np.number] + :param intermed_sq_sum: A 1D array of shape (1,) containing the precomputed sum of the squares of the intermediate values. :param result: A 1D array of shape (m,) to store the resulting correlation coefficients. :type result: cuda.devicearray.DeviceNDArray """ diff --git a/test/sca/test_stacked_correlate.py b/test/sca/test_stacked_correlate.py index ce5a9ed..954494f 100644 --- a/test/sca/test_stacked_correlate.py +++ b/test/sca/test_stacked_correlate.py @@ -35,17 +35,7 @@ def intermediate_values(): def pearson_corr(samples, intermediate_values): - int_sum = np.sum(intermediate_values) - int_sq_sum = np.sum(np.square(intermediate_values)) - samples_sum = np.sum(samples, axis=0) - samples_sq_sum = np.sum(np.square(samples), axis=0) - samples_intermed_sum = np.sum( - samples * intermediate_values[:, None], axis=0) - n = samples.shape[0] - - return (n * samples_intermed_sum - int_sum * samples_sum) / \ - (np.sqrt(n * int_sq_sum - int_sum ** 2) * - np.sqrt(n * samples_sq_sum - np.square(samples_sum))) + return np.corrcoef(samples, intermediate_values, rowvar=False)[-1, :-1] def test_pearson_coef_no_chunking(samples, gpu_manager, intermediate_values): -- cgit v1.2.3-70-g09d2