diff options
Diffstat (limited to 'analysis')
| -rw-r--r-- | analysis/ec.py | 1010 | ||||
| -rw-r--r-- | analysis/plot_dh.ipynb | 668 | ||||
| -rw-r--r-- | analysis/plot_dsa.ipynb | 743 | ||||
| -rw-r--r-- | analysis/plot_gen.ipynb | 755 | ||||
| -rw-r--r-- | analysis/utils.py | 111 |
5 files changed, 3287 insertions, 0 deletions
diff --git a/analysis/ec.py b/analysis/ec.py new file mode 100644 index 0000000..4e3244a --- /dev/null +++ b/analysis/ec.py @@ -0,0 +1,1010 @@ +""" +This module is a collection of modules glued together, to provide basic +elliptic curve arithmetic for curves over prime and binary fields. It consists of + - tinyec: https://github.com/alexmgr/tinyec (GPL v3 licensed) + - pyfinite: https://github.com/emin63/pyfinite (MIT licensed) + - modular square root from https://eli.thegreenplace.net/2009/03/07/computing-modular-square-roots-in-python + - and some of my own code: https://github.com/J08nY +""" + +import abc +import random +from functools import reduce, wraps +from os import path + + +def legendre_symbol(a, p): + """ Compute the Legendre symbol a|p using + Euler's criterion. p is a prime, a is + relatively prime to p (if p divides + a, then a|p = 0) + + Returns 1 if a has a square root modulo + p, -1 otherwise. + """ + ls = pow(a, (p - 1) // 2, p) + return -1 if ls == p - 1 else ls + + +def is_prime(n, trials=50): + """ + Miller-Rabin primality test. + """ + s = 0 + d = n - 1 + while d % 2 == 0: + d >>= 1 + s += 1 + assert (2 ** s * d == n - 1) + + def trial_composite(a): + if pow(a, d, n) == 1: + return False + for i in range(s): + if pow(a, 2 ** i * d, n) == n - 1: + return False + return True + + for i in range(trials): # number of trials + a = random.randrange(2, n) + if trial_composite(a): + return False + return True + + +def gcd(a, b): + """Euclid's greatest common denominator algorithm.""" + if abs(a) < abs(b): + return gcd(b, a) + + while abs(b) > 0: + q, r = divmod(a, b) + a, b = b, r + + return a + + +def extgcd(a, b): + """Extended Euclid's greatest common denominator algorithm.""" + if abs(b) > abs(a): + (x, y, d) = extgcd(b, a) + return y, x, d + + if abs(b) == 0: + return 1, 0, a + + x1, x2, y1, y2 = 0, 1, 1, 0 + while abs(b) > 0: + q, r = divmod(a, b) + x = x2 - q * x1 + y = y2 - q * y1 + a, b, x2, x1, y2, y1 = b, r, x1, x, y1, y + + return x2, y2, a + + +def check(func): + @wraps(func) + def method(self, other): + if isinstance(other, int): + other = self.__class__(other, self.field) + if type(self) is type(other): + if self.field == other.field: + return func(self, other) + else: + raise ValueError + else: + raise TypeError + + return method + + +class Mod(object): + """An element x of ℤₙ.""" + + def __init__(self, x: int, n: int): + self.x = x % n + self.field = n + + @check + def __add__(self, other): + return Mod((self.x + other.x) % self.field, self.field) + + @check + def __radd__(self, other): + return self + other + + @check + def __sub__(self, other): + return Mod((self.x - other.x) % self.field, self.field) + + @check + def __rsub__(self, other): + return -self + other + + def __neg__(self): + return Mod(self.field - self.x, self.field) + + def inverse(self): + x, y, d = extgcd(self.x, self.field) + return Mod(x, self.field) + + def __invert__(self): + return self.inverse() + + @check + def __mul__(self, other): + return Mod((self.x * other.x) % self.field, self.field) + + @check + def __rmul__(self, other): + return self * other + + @check + def __truediv__(self, other): + return self * ~other + + @check + def __rtruediv__(self, other): + return ~self * other + + @check + def __floordiv__(self, other): + return self * ~other + + @check + def __rfloordiv__(self, other): + return ~self * other + + @check + def __div__(self, other): + return self.__floordiv__(other) + + @check + def __rdiv__(self, other): + return self.__rfloordiv__(other) + + @check + def __divmod__(self, divisor): + q, r = divmod(self.x, divisor.x) + return Mod(q, self.field), Mod(r, self.field) + + def __int__(self): + return self.x + + def __eq__(self, other): + if type(other) is not Mod: + return False + return self.x == other.x and self.field == other.field + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return str(self.x) + + def __pow__(self, n): + if not isinstance(n, int): + raise TypeError + if n == 0: + return Mod(1, self.field) + if n < 0: + return (~self) ** -n + if n == 1: + return self + if n == 2: + return self * self + + q = self + r = self if n & 1 else Mod(1, self.field) + + i = 2 + while i <= n: + q = (q * q) + if n & i == i: + r = (q * r) + i = i << 1 + return r + + def sqrt(self): + if not is_prime(self.field): + raise NotImplementedError + # Simple cases + if legendre_symbol(self.x, self.field) != 1 or self.x == 0 or self.field == 2: + raise ValueError("Not a quadratic residue.") + if self.field % 4 == 3: + return self ** ((self.field + 1) // 4) + + a = self.x + p = self.field + s = p - 1 + e = 0 + while s % 2 == 0: + s /= 2 + e += 1 + + n = 2 + while legendre_symbol(n, p) != -1: + n += 1 + + x = pow(a, (s + 1) / 2, p) + b = pow(a, s, p) + g = pow(n, s, p) + r = e + + while True: + t = b + m = 0 + for m in range(r): + if t == 1: + break + t = pow(t, 2, p) + + if m == 0: + return Mod(x, p) + + gs = pow(g, 2 ** (r - m - 1), p) + g = (gs * gs) % p + x = (x * gs) % p + b = (b * g) % p + r = m + + +class FField(object): + """ + The FField class implements a binary field. + """ + + def __init__(self, n, gen): + """ + This method constructs the field GF(2^n). It takes two + required arguments, n and gen, + representing the coefficients of the generator polynomial + (of degree n) to use. + Note that you can look at the generator for the field object + F by looking at F.generator. + """ + + self.n = n + if len(gen) != n + 1: + full_gen = [0] * (n + 1) + for i in gen: + full_gen[i] = 1 + gen = full_gen[::-1] + self.generator = self.to_element(gen) + self.unity = 1 + + def add(self, x, y): + """ + Adds two field elements and returns the result. + """ + + return x ^ y + + def subtract(self, x, y): + """ + Subtracts the second argument from the first and returns + the result. In fields of characteristic two this is the same + as the Add method. + """ + return x ^ y + + def multiply(self, f, v): + """ + Multiplies two field elements (modulo the generator + self.generator) and returns the result. + + See MultiplyWithoutReducing if you don't want multiplication + modulo self.generator. + """ + m = self.multiply_no_reduce(f, v) + return self.full_division(m, self.generator, self.find_degree(m), self.n)[1] + + def inverse(self, f): + """ + Computes the multiplicative inverse of its argument and + returns the result. + """ + return self.ext_gcd(self.unity, f, self.generator, self.find_degree(f), self.n)[1] + + def divide(self, f, v): + """ + Divide(f,v) returns f * v^-1. + """ + return self.multiply(f, self.inverse(v)) + + def exponentiate(self, f, n): + """ + Exponentiate(f, n) returns f^n. + """ + if not isinstance(n, int): + raise TypeError + if n == 0: + return self.unity + if n < 0: + f = self.inverse(f) + n = -n + if n == 1: + return f + if n == 2: + return self.multiply(f, f) + + q = f + r = f if n & 1 else self.unity + + i = 2 + while i <= n: + q = self.multiply(q, q) + if n & i == i: + r = self.multiply(q, r) + i = i << 1 + return r + + def sqrt(self, f): + return self.exponentiate(f, (2 ** self.n) - 1) + + def trace(self, f): + t = f + for _ in range(1, self.n): + t = self.add(self.multiply(t, t), f) + return t + + def half_trace(self, f): + if self.n % 2 != 1: + raise ValueError + h = f + for _ in range(1, (self.n - 1) // 2): + h = self.multiply(h, h) + h = self.add(self.multiply(h, h), f) + return h + + def find_degree(self, v): + """ + Find the degree of the polynomial representing the input field + element v. This takes O(degree(v)) operations. + + A faster version requiring only O(log(degree(v))) + could be written using binary search... + """ + if v: + return v.bit_length() - 1 + else: + return 0 + + def multiply_no_reduce(self, f, v): + """ + Multiplies two field elements and does not take the result + modulo self.generator. You probably should not use this + unless you know what you are doing; look at Multiply instead. + """ + + result = 0 + mask = self.unity + for i in range(self.n + 1): + if mask & v: + result = result ^ f + f = f << 1 + mask = mask << 1 + return result + + def ext_gcd(self, d, a, b, a_degree, b_degree): + """ + Takes arguments (d,a,b,aDegree,bDegree) where d = gcd(a,b) + and returns the result of the extended Euclid algorithm + on (d,a,b). + """ + if b == 0: + return a, self.unity, 0 + else: + (floorADivB, aModB) = self.full_division(a, b, a_degree, b_degree) + (d, x, y) = self.ext_gcd(d, b, aModB, b_degree, self.find_degree(aModB)) + return d, y, self.subtract(x, self.multiply(floorADivB, y)) + + def full_division(self, f, v, f_degree, v_degree): + """ + Takes four arguments, f, v, fDegree, and vDegree where + fDegree and vDegree are the degrees of the field elements + f and v represented as a polynomials. + This method returns the field elements a and b such that + + f(x) = a(x) * v(x) + b(x). + + That is, a is the divisor and b is the remainder, or in + other words a is like floor(f/v) and b is like f modulo v. + """ + + result = 0 + mask = self.unity << f_degree + for i in range(f_degree, v_degree - 1, -1): + if mask & f: + result = result ^ (self.unity << (i - v_degree)) + f = self.subtract(f, v << (i - v_degree)) + mask = mask >> self.unity + return result, f + + def coefficients(self, f): + """ + Show coefficients of input field element represented as a + polynomial in decreasing order. + """ + + result = [] + for i in range(self.n, -1, -1): + if (self.unity << i) & f: + result.append(1) + else: + result.append(0) + + return result + + def polynomial(self, f): + """ + Show input field element represented as a polynomial. + """ + + f_degree = self.find_degree(f) + result = '' + + if f == 0: + return '0' + + for i in range(f_degree, 0, -1): + if (1 << i) & f: + result = result + (' x^' + repr(i)) + if 1 & f: + result = result + ' ' + repr(1) + return result.strip().replace(' ', ' + ') + + def to_element(self, l): + """ + This method takes as input a binary list (e.g. [1, 0, 1, 1]) + and converts it to a decimal representation of a field element. + For example, [1, 0, 1, 1] is mapped to 8 | 2 | 1 = 11. + + Note if the input list is of degree >= to the degree of the + generator for the field, then you will have to call take the + result modulo the generator to get a proper element in the + field. + """ + + temp = map(lambda a, b: a << b, l, range(len(l) - 1, -1, -1)) + return reduce(lambda a, b: a | b, temp) + + def __str__(self): + return "F_(2^{}): {}".format(self.n, self.polynomial(self.generator)) + + def __repr__(self): + return str(self) + + +class FElement(object): + """ + This class provides field elements which overload the + +,-,*,%,//,/ operators to be the appropriate field operation. + Note that before creating FElement objects you must first + create an FField object. + """ + + def __init__(self, f, field): + """ + The constructor takes two arguments, field, and e where + field is an FField object and e is an integer representing + an element in FField. + + The result is a new FElement instance. + """ + self.f = f + self.field = field + + @check + def __add__(self, other): + return FElement(self.field.add(self.f, other.f), self.field) + + @check + def __sub__(self, other): + return FElement(self.field.add(self.f, other.f), self.field) + + def __neg__(self): + return self + + @check + def __mul__(self, other): + return FElement(self.field.multiply(self.f, other.f), self.field) + + @check + def __floordiv__(self, o): + return FElement(self.field.full_division(self.f, o.f, + self.field.find_degree(self.f), + self.field.find_degree(o.f))[0], self.field) + + @check + def __truediv__(self, other): + return FElement(self.field.divide(self.f, other.f), self.field) + + def __div__(self, *args, **kwargs): + return self.__truediv__(*args, **kwargs) + + @check + def __divmod__(self, other): + d, m = self.field.full_division(self.f, other.f, + self.field.find_degree(self.f), + self.field.find_degree(other.f)) + return FElement(d, self.field), FElement(m, self.field) + + def inverse(self): + return FElement(self.field.inverse(self.f), self.field) + + def __invert__(self): + return self.inverse() + + def sqrt(self): + return FElement(self.field.sqrt(self.f), self.field) + + def trace(self): + return FElement(self.field.trace(self.f), self.field) + + def half_trace(self): + return FElement(self.field.half_trace(self.f), self.field) + + def __pow__(self, power, modulo=None): + return FElement(self.field.exponentiate(self.f, power), self.field) + + def __str__(self): + return str(int(self)) + + def __repr__(self): + return str(self) + + def __int__(self): + return self.f + + def __eq__(self, other): + if not isinstance(other, FElement): + return False + if self.field != other.field: + return False + return self.f == other.f + + +class Curve(object): + __metaclass__ = abc.ABCMeta + + def __init__(self, field, a, b, group, name=None): + self.field = field + if name is None: + name = "undefined" + self.name = name + self.a = a + self.b = b + self.group = group + self.g = Point(self, self.group.g[0], self.group.g[1]) + + @abc.abstractmethod + def is_singular(self): + ... + + @abc.abstractmethod + def on_curve(self, x, y): + ... + + @abc.abstractmethod + def add(self, x1, y1, x2, y2): + ... + + @abc.abstractmethod + def dbl(self, x, y): + ... + + @abc.abstractmethod + def neg(self, x, y): + ... + + @abc.abstractmethod + def encode_point(self, point, compressed=False): + ... + + @abc.abstractmethod + def decode_point(self, byte_data): + ... + + def bit_size(self): + return self.group.n.bit_length() + + def byte_size(self): + return (self.bit_size() + 7) // 8 + + @abc.abstractmethod + def field_size(self): + ... + + def __eq__(self, other): + if not isinstance(other, Curve): + return False + return self.field == other.field and self.a == other.a and self.b == other.b and self.group == other.group + + def __repr__(self): + return str(self) + + +class CurveFp(Curve): + def is_singular(self): + return (4 * self.a ** 3 + 27 * self.b ** 2) == 0 + + def on_curve(self, x, y): + return (y ** 2 - x ** 3 - self.a * x - self.b) == 0 + + def add(self, x1, y1, x2, y2): + lm = (y2 - y1) / (x2 - x1) + x3 = lm ** 2 - x1 - x2 + y3 = lm * (x1 - x3) - y1 + return x3, y3 + + def dbl(self, x, y): + lm = (3 * x ** 2 + self.a) / (2 * y) + x3 = lm ** 2 - (2 * x) + y3 = lm * (x - x3) - y + return x3, y3 + + def mul(self, k, x, y, z=1): + def _add(x1, y1, z1, x2, y2, z2): + yz = y1 * z2 + xz = x1 * z2 + zz = z1 * z2 + u = y2 * z1 - yz + uu = u ** 2 + v = x2 * z1 - xz + vv = v ** 2 + vvv = v * vv + r = vv * xz + a = uu * zz - vvv - 2 * r + x3 = v * a + y3 = u * (r - a) - vvv * yz + z3 = vvv * zz + return x3, y3, z3 + + def _dbl(x1, y1, z1): + xx = x1 ** 2 + zz = z1 ** 2 + w = self.a * zz + 3 * xx + s = 2 * y1 * z1 + ss = s ** 2 + sss = s * ss + r = y1 * s + rr = r ** 2 + b = (x1 + r) ** 2 - xx - rr + h = w ** 2 - 2 * b + x3 = h * s + y3 = w * (b - h) - 2 * rr + z3 = sss + return x3, y3, z3 + r0 = (x, y, z) + r1 = _dbl(x, y, z) + for i in range(k.bit_length() - 2, -1, -1): + if k & (1 << i): + r0 = _add(*r0, *r1) + r1 = _dbl(*r1) + else: + r1 = _add(*r0, *r1) + r0 = _dbl(*r0) + rx, ry, rz = r0 + rzi = ~rz + return rx * rzi, ry * rzi + + def neg(self, x, y): + return x, -y + + def field_size(self): + return self.field.bit_length() + + def encode_point(self, point, compressed=False): + byte_size = (self.field_size() + 7) // 8 + if not compressed: + return bytes((0x04,)) + int(point.x).to_bytes(byte_size, byteorder="big") + int( + point.y).to_bytes(byte_size, byteorder="big") + else: + yp = int(point.y) & 1 + pc = bytes((0x02 | yp,)) + return pc + int(point.x).to_bytes(byte_size, byteorder="big") + + def decode_point(self, byte_data): + if byte_data[0] == 0 and len(byte_data) == 1: + return Inf(self) + byte_size = (self.field_size() + 7) // 8 + if byte_data[0] in (0x04, 0x06): + if len(byte_data) != 1 + byte_size * 2: + raise ValueError + x = Mod(int.from_bytes(byte_data[1:byte_size + 1], byteorder="big"), self.field) + y = Mod(int.from_bytes(byte_data[byte_size + 1:], byteorder="big"), self.field) + return Point(self, x, y) + elif byte_data[0] in (0x02, 0x03): + if len(byte_data) != 1 + byte_size: + raise ValueError + x = Mod(int.from_bytes(byte_data[1:byte_size + 1], byteorder="big"), self.field) + rhs = x ** 3 + self.a * x + self.b + sqrt = rhs.sqrt() + yp = byte_data[0] & 1 + if int(sqrt) & 1 == yp: + return Point(self, x, sqrt) + else: + return Point(self, x, self.field - sqrt) + raise ValueError + + def __str__(self): + return "\"{}\": y^2 = x^3 + {}x + {} over {}".format(self.name, self.a, self.b, self.field) + + +class CurveF2m(Curve): + def is_singular(self): + return self.b == 0 + + def on_curve(self, x, y): + return (y ** 2 + x * y - x ** 3 - self.a * x ^ 2 - self.b) == 0 + + def add(self, x1, y1, x2, y2): + lm = (y1 + y2) / (x1 + x2) + x3 = lm ** 2 + lm + x1 + x2 + self.a + y3 = lm * (x1 + x3) + x3 + y1 + return x3, y3 + + def dbl(self, x, y): + lm = x + y / x + x3 = lm ** 2 + lm + self.a + y3 = x ** 2 + lm * x3 + x3 + return x3, y3 + + def mul(self, k, x, y, z=1): + def _add(x1, y1, z1, x2, y2, z2): + a = x1 * z2 + b = x2 * z1 + c = a ** 2 + d = b ** 2 + e = a + b + f = c + d + g = y1 * (z2 ** 2) + h = y2 * (z1 ** 2) + i = g + h + j = i * e + z3 = f * z1 * z2 + x3 = a * (h + d) + b * (c + g) + y3 = (a * j + f * g) * f + (j + z3) * x3 + return x3, y3, z3 + + def _dbl(x1, y1, z1): + a = x1 * z1 + b = x1 * x1 + c = b + y1 + d = a * c + z3 = a * a + x3 = c ** 2 + d + self.a * z3 + y3 = (z3 + d) * x3 + b ** 2 * z3 + return x3, y3, z3 + r0 = (x, y, z) + r1 = _dbl(x, y, z) + for i in range(k.bit_length() - 2, -1, -1): + if k & (1 << i): + r0 = _add(*r0, *r1) + r1 = _dbl(*r1) + else: + r1 = _add(*r0, *r1) + r0 = _dbl(*r0) + rx, ry, rz = r0 + rzi = ~rz + return rx * rzi, ry * (rzi ** 2) + + def neg(self, x, y): + return x, x + y + + def field_size(self): + return self.field.n + + def encode_point(self, point, compressed=False): + byte_size = (self.field_size() + 7) // 8 + if not compressed: + return bytes((0x04,)) + int(point.x).to_bytes(byte_size, byteorder="big") + int( + point.y).to_bytes(byte_size, byteorder="big") + else: + if int(point.x) == 0: + yp = 0 + else: + yp = int(point.y * point.x.inverse()) + pc = bytes((0x02 | yp,)) + return pc + int(point.x).to_bytes(byte_size, byteorder="big") + + def decode_point(self, byte_data): + if byte_data[0] == 0 and len(byte_data) == 1: + return Inf(self) + byte_size = (self.field_size() + 7) // 8 + if byte_data[0] in (0x04, 0x06): + if len(byte_data) != 1 + byte_size * 2: + raise ValueError + x = FElement(int.from_bytes(byte_data[1:byte_size + 1], byteorder="big"), self.field) + y = FElement(int.from_bytes(byte_data[byte_size + 1:], byteorder="big"), self.field) + return Point(self, x, y) + elif byte_data[0] in (0x02, 0x03): + if self.field.n % 2 != 1: + raise NotImplementedError + x = FElement(int.from_bytes(byte_data[1:byte_size + 1], byteorder="big"), self.field) + yp = byte_data[0] & 1 + if int(x) == 0: + y = self.b ** (2 ** (self.field.n - 1)) + else: + rhs = x + self.a + self.b * x ** (-2) + z = rhs.half_trace() + if z ** 2 + z != rhs: + raise ValueError + if int(z) & 1 != yp: + z += 1 + y = x * z + return Point(self, x, y) + raise ValueError + + def __str__(self): + return "\"{}\" => y^2 + xy = x^3 + {}x^2 + {} over {}".format(self.name, self.a, self.b, + self.field) + + +class SubGroup(object): + def __init__(self, g, n, h): + self.g = g + self.n = n + self.h = h + + def __eq__(self, other): + if not isinstance(other, SubGroup): + return False + return self.g == other.g and self.n == other.n and self.h == other.h + + def __str__(self): + return "Subgroup => generator {}, order: {}, cofactor: {}".format(self.g, self.n, self.h) + + def __repr__(self): + return str(self) + + +class Inf(object): + def __init__(self, curve, x=None, y=None): + self.x = x + self.y = y + self.curve = curve + + def __eq__(self, other): + if not isinstance(other, Inf): + return False + return self.curve == other.curve + + def __ne__(self, other): + return not self.__eq__(other) + + def __neg__(self): + return self + + def __add__(self, other): + if isinstance(other, Inf): + return Inf(self.curve) + if isinstance(other, Point): + return other + raise TypeError( + "Unsupported operand type(s) for +: '%s' and '%s'" % (self.__class__.__name__, + other.__class__.__name__)) + + def __radd__(self, other): + return self + other + + def __sub__(self, other): + if isinstance(other, Inf): + return Inf(self.curve) + if isinstance(other, Point): + return other + raise TypeError( + "Unsupported operand type(s) for +: '%s' and '%s'" % (self.__class__.__name__, + other.__class__.__name__)) + + def __str__(self): + return "{} on {}".format(self.__class__.__name__, self.curve) + + def __repr__(self): + return str(self) + + +class Point(object): + def __init__(self, curve, x, y): + self.curve = curve + self.x = x + self.y = y + + def __eq__(self, other): + if not isinstance(other, Point): + return False + return self.x == other.x and self.y == other.y and self.curve == other.curve + + def __ne__(self, other): + return not self.__eq__(other) + + def __neg__(self): + return Point(self.curve, *self.curve.neg(self.x, self.y)) + + def __add__(self, other): + if isinstance(other, Inf): + return self + if isinstance(other, Point): + if self.curve != other.curve: + raise ValueError("Cannot add points belonging to different curves") + if self == -other: + return Inf(self.curve) + elif self == other: + return Point(self.curve, *self.curve.dbl(self.x, self.y)) + else: + return Point(self.curve, *self.curve.add(self.x, self.y, other.x, other.y)) + else: + raise TypeError( + "Unsupported operand type(s) for +: '{}' and '{}'".format( + self.__class__.__name__, + other.__class__.__name__)) + + def __radd__(self, other): + return self + other + + def __sub__(self, other): + return self + (-other) + + def __rsub__(self, other): + return self - other + + def __mul__(self, other): + if isinstance(other, int): + if other % self.curve.group.n == 0: + return Inf(self.curve) + if other < 0: + other = -other + addend = -self + else: + addend = self + if hasattr(self.curve, "mul") and callable(getattr(self.curve, "mul")): + return Point(self.curve, *self.curve.mul(other, addend.x, addend.y)) + else: + result = Inf(self.curve) + # Iterate over all bits starting by the LSB + for bit in reversed([int(i) for i in bin(abs(other))[2:]]): + if bit == 1: + result += addend + addend += addend + return result + else: + raise TypeError( + "Unsupported operand type(s) for *: '%s' and '%s'" % (other.__class__.__name__, + self.__class__.__name__)) + + def __rmul__(self, other): + return self * other + + def __str__(self): + return "({}, {}) on {}".format(self.x, self.y, self.curve) + + def __repr__(self): + return str(self) + + +def load_curve(file, name=None): + data = file.read() + parts = list(map(lambda x: int(x, 16), data.split(","))) + if len(parts) == 7: + p, a, b, gx, gy, n, h = parts + g = (Mod(gx, p), Mod(gy, p)) + group = SubGroup(g, n, h) + return CurveFp(p, Mod(a, p), Mod(b, p), group, name) + elif len(parts) == 10: + m, e1, e2, e3, a, b, gx, gy, n, h = parts + poly = [m, e1, e2, e3, 0] + field = FField(m, poly) + g = (FElement(gx, field), FElement(gy, field)) + group = SubGroup(g, n, h) + return CurveF2m(field, FElement(a, field), FElement(b, field), group, name) + else: + raise ValueError("Invalid curve data") + + +def get_curve(idd): + cat, i = idd.split("/") + with open(path.join("..", "src", "cz", "crcs", "ectester", "data", cat, i + ".csv"), "r") as f: + return load_curve(f, i) + diff --git a/analysis/plot_dh.ipynb b/analysis/plot_dh.ipynb new file mode 100644 index 0000000..f43d631 --- /dev/null +++ b/analysis/plot_dh.ipynb @@ -0,0 +1,668 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysis of key agreement data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T13:35:38.954375Z", + "start_time": "2019-03-19T13:35:38.578219Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib notebook\n", + "import numpy as np\n", + "from scipy.stats import describe\n", + "from scipy.stats import norm as norm_dist\n", + "from scipy.stats.mstats import mquantiles\n", + "from math import log, sqrt\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib import ticker, colors, gridspec\n", + "from copy import deepcopy\n", + "from utils import plot_hist, moving_average, hw, time_scale, hist_size_func\n", + "from binascii import unhexlify\n", + "from IPython.display import display, HTML\n", + "from ipywidgets import interact, interactive, fixed, interact_manual\n", + "import ipywidgets as widgets\n", + "import tabulate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings\n", + "Enter your input below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:43:15.121139Z", + "start_time": "2019-03-19T12:43:15.117537Z" + } + }, + "outputs": [], + "source": [ + "# File name with output from ECTesterReader or ECTesterStandalone ECDH.\n", + "fname = \"filename.csv\"\n", + "\n", + "# The time unit used in displaying the plots. One of \"milli\", \"micro\", \"nano\".\n", + "# WARNING: Using nano might lead to very large plots/histograms and to the\n", + "# notebook to freeze or run out of memory, as well as bad visualization\n", + "# quality, due to noise and low density.\n", + "time_unit = \"milli\"\n", + "# A number which will be used to divide the time into sub-units, e.g. for 5, time will be in fifths of units\n", + "scaling_factor = 1\n", + "\n", + "# The amount of entries skipped from the beginning of the file, as they are usually outliers.\n", + "skip_first = 10\n", + "\n", + "# Whether to plot things in logarithmic scale or not.\n", + "log_scale = False\n", + "\n", + "# Whether to trim the time data outside the 1 - 99 percentile range (adjust below). Quite useful.\n", + "trim = True\n", + "\n", + "# How much to trim? Either a number in [0,1] signifying a quantile, or an absolute value signifying a threshold\n", + "trim_low = 0.01\n", + "trim_high = 0.99\n", + "\n", + "# Graphical (matplotlib) style name\n", + "style = \"ggplot\"\n", + "\n", + "# Color map to use, and what color to assign to \"bad\" values (necessary for log_scale)\n", + "color_map = plt.cm.viridis\n", + "color_map_bad = \"black\"\n", + "\n", + "# What function to use to calculate number of histogram bins of time\n", + "# one of \"sqrt\", \"sturges\", \"rice\", \"scott\" and \"fd\" or a number specifying the number of bins\n", + "hist_size = \"sturges\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:43:16.687260Z", + "start_time": "2019-03-19T12:43:16.031604Z" + } + }, + "outputs": [], + "source": [ + "# Setup plot style\n", + "\n", + "plt.style.use(style)\n", + "\n", + "cmap = deepcopy(color_map)\n", + "cmap.set_bad(color_map_bad)\n", + "\n", + "# Normalization, linear or log.\n", + "if log_scale:\n", + " norm = colors.LogNorm()\n", + "else:\n", + " norm = colors.Normalize()\n", + "\n", + "# Read the header line.\n", + "\n", + "with open(fname, \"r\") as f:\n", + " header = f.readline()\n", + "header_names = header.split(\";\")\n", + "if len(header_names) != 5:\n", + " print(\"Bad data?\")\n", + " exit(1)\n", + "\n", + "# Load the data\n", + "\n", + "hx = lambda x: int(x, 16)\n", + "data = np.genfromtxt(fname, delimiter=\";\", skip_header=1, converters={2: unhexlify, 3: hx, 4: hx},\n", + " dtype=np.dtype([(\"index\", \"u4\"), (\"time\", \"u4\"), (\"pub\", \"O\"), (\"priv\", \"O\"), (\"secret\", \"O\")]))\n", + "\n", + "# Skip first (outliers?)\n", + "\n", + "data = data[skip_first:]\n", + "\n", + "# Setup the data\n", + "\n", + "orig_time_unit = header_names[1].split(\"[\")[1][:-1]\n", + "time_disp_unit = time_scale(data[\"time\"], orig_time_unit, time_unit, scaling_factor)\n", + "\n", + "# Trim times\n", + "quant_low_bound = trim_low if 0 <= trim_low <= 1 else 0.01\n", + "quant_high_bound = trim_high if 0 <= trim_high <= 1 else 0.95\n", + "quantiles = mquantiles(data[\"time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", + "if trim:\n", + " low_bound = quantiles[0] if 0 <= trim_low <= 1 else trim_low\n", + " high_bound = quantiles[4] if 0 <= trim_high <= 1 else trim_high\n", + " data_trimmed = data[np.logical_and(data[\"time\"] >= low_bound,\n", + " data[\"time\"] <= high_bound)]\n", + " quantiles_trim = mquantiles(data_trimmed[\"time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", + "else:\n", + " low_bound = None\n", + " high_bound = None\n", + " data_trimmed = data\n", + " quantiles_trim = quantiles_gen\n", + "\n", + "description = describe(data[\"time\"])\n", + "description_trim = describe(data_trimmed[\"time\"])\n", + "\n", + "max_time = description.minmax[1]\n", + "min_time = description.minmax[0]\n", + "bit_size = len(bin(max(data[\"priv\"]))) - 2\n", + "byte_size = (bit_size + 7) // 8\n", + "bit_size = byte_size * 8\n", + "\n", + "hist_size_time = hist_size_func(hist_size)(description.nobs, min_time, max_time, description.variance, quantiles[1], quantiles[3])\n", + "hist_size_time_trim = hist_size_func(hist_size)(description_trim.nobs, description_trim.minmax[0], description_trim.minmax[1], description_trim.variance, quantiles_trim[1], quantiles_trim[3])\n", + "\n", + "if hist_size_time < 30:\n", + " hist_size_time = max_time - min_time\n", + "if hist_size_time_trim < 30:\n", + " hist_size_time_trim = description_trim.minmax[1] - description_trim.minmax[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:43:17.706648Z", + "start_time": "2019-03-19T12:43:17.695215Z" + } + }, + "outputs": [], + "source": [ + "display(\"Raw\")\n", + "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", + " description]\n", + "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))\n", + "display(\"Trimmed\")\n", + "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", + " description_trim]\n", + "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Selected quantiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:43:19.507884Z", + "start_time": "2019-03-19T12:43:19.502941Z" + } + }, + "outputs": [], + "source": [ + "tbl = [(quant_low_bound, \"0.25\", \"0.5\", \"0.75\", quant_high_bound),\n", + " list(map(lambda x: \"{} {}\".format(x, time_disp_unit), quantiles))]\n", + "display(HTML(tabulate.tabulate(tbl, tablefmt=\"html\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:43:20.963153Z", + "start_time": "2019-03-19T12:43:20.956502Z" + } + }, + "outputs": [], + "source": [ + "display(\"Bitsize: {}\".format(bit_size))\n", + "display(\"Histogram time bins: {}\".format(hist_size_time))\n", + "display(\"Histogram time bins(trimmed): {}\".format(hist_size_time_trim))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key MSB vs time heatmap\n", + "The heatmap should show uncorrelated variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:43:22.063050Z", + "start_time": "2019-03-19T12:43:21.967845Z" + } + }, + "outputs": [], + "source": [ + "fig_private = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_private = fig_private.add_subplot(1, 1, 1, title=\"Private key MSB vs key agreement time\")\n", + "priv_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data_trimmed[\"priv\"])), dtype=np.dtype(\"u1\"))\n", + "max_msb = max(priv_msb)\n", + "min_msb = min(priv_msb)\n", + "heatmap, xedges, yedges = np.histogram2d(priv_msb, data_trimmed[\"time\"],\n", + " bins=[max_msb - min_msb + 1, hist_size_time_trim])\n", + "extent = [min_msb, max_msb, yedges[0], yedges[-1]]\n", + "im = axe_private.imshow(heatmap.T, extent=extent, aspect=\"auto\", cmap=cmap, origin=\"low\",\n", + " interpolation=\"nearest\", norm=norm)\n", + "axe_private.set_xlabel(\"private key MSB value\")\n", + "axe_private.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", + "fig_private.colorbar(im, ax=axe_private)\n", + "\n", + "fig_private.tight_layout()\n", + "del priv_msb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key Hamming Weight vs time heatmap\n", + "The heatmap should show uncorrelated variables.\n", + "\n", + "Also contains a private key Hamming Weight histogram, which should be binomially distributed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:47:12.436692Z", + "start_time": "2019-03-19T12:47:11.310271Z" + } + }, + "outputs": [], + "source": [ + "fig_priv_hist = plt.figure(figsize=(10.5, 12), dpi=90)\n", + "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", + "axe_priv_hist = fig_priv_hist.add_subplot(gs[0], title=\"Private key Hamming weight vs key agreement time\")\n", + "axe_priv_hist_hw = fig_priv_hist.add_subplot(gs[1], sharex=axe_priv_hist, title=\"Private key Hamming weight\")\n", + "priv_hw = np.array(list(map(hw, data_trimmed[\"priv\"])), dtype=np.dtype(\"u2\"))\n", + "h, xe, ye = np.histogram2d(priv_hw, data_trimmed[\"time\"], bins=[max(priv_hw) - min(priv_hw), hist_size_time_trim])\n", + "im = axe_priv_hist.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", + "axe_priv_hist.axvline(x=bit_size//2, alpha=0.7, linestyle=\"dotted\", color=\"white\", label=str(bit_size//2) + \" bits\")\n", + "axe_priv_hist.set_xlabel(\"private key Hamming weight\")\n", + "axe_priv_hist.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", + "axe_priv_hist.legend(loc=\"best\")\n", + "\n", + "plot_hist(axe_priv_hist_hw, priv_hw, \"private key Hamming weight\", log_scale, None)\n", + "\n", + "param = norm_dist.fit(priv_hw)\n", + "pdf_range = np.arange(min(priv_hw), max(priv_hw))\n", + "norm_pdf = norm_dist.pdf(pdf_range, *param[:-2], loc=param[-2], scale=param[-1]) * description_trim.nobs\n", + "axe_priv_hist_hw.plot(pdf_range, norm_pdf, label=\"fitted normal distribution\")\n", + "axe_priv_hist_hw.legend(loc=\"best\")\n", + "\n", + "fig_priv_hist.tight_layout()\n", + "fig_priv_hist.colorbar(im, ax=[axe_priv_hist, axe_priv_hist_hw])\n", + "\n", + "display(HTML(\"<b>Private key Hamming weight fitted with normal distribution:</b>\"))\n", + "display(HTML(tabulate.tabulate([(\"Mean\", \"Variance\"), param], tablefmt=\"html\")))\n", + "\n", + "del priv_hw" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key agreement time histogram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:47:20.496134Z", + "start_time": "2019-03-19T12:47:20.360405Z" + } + }, + "outputs": [], + "source": [ + "fig_ka_hist = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_hist_full = fig_ka_hist.add_subplot(2, 1, 1)\n", + "axe_hist_trim = fig_ka_hist.add_subplot(2, 1, 2)\n", + "plot_hist(axe_hist_full, data[\"time\"], \"key agreement time ({})\".format(time_disp_unit), log_scale, hist_size_time);\n", + "plot_hist(axe_hist_trim, data_trimmed[\"time\"], \"key agreement time ({})\".format(time_disp_unit), log_scale, hist_size_time_trim);\n", + "\n", + "fig_ka_hist.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Moving averages of key agreement time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:47:21.706585Z", + "start_time": "2019-03-19T12:47:21.658363Z" + } + }, + "outputs": [], + "source": [ + "fig_avg = plt.figure(figsize=(10.5, 7), dpi=90)\n", + "axe_avg = fig_avg.add_subplot(1, 1, 1, title=\"Moving average of key agreement time\")\n", + "avg_100 = moving_average(data[\"time\"], 100)\n", + "avg_1000 = moving_average(data[\"time\"], 1000)\n", + "axe_avg.plot(avg_100, label=\"window = 100\")\n", + "axe_avg.plot(avg_1000, label=\"window = 1000\")\n", + "if low_bound is not None:\n", + " axe_avg.axhline(y=low_bound, alpha=0.7, linestyle=\"dotted\", color=\"green\", label=\"Low trim bound = {}\".format(low_bound))\n", + "if high_bound is not None:\n", + " axe_avg.axhline(y=high_bound, alpha=0.7, linestyle=\"dotted\", color=\"orange\", label=\"Hight trim bound = {}\".format(high_bound))\n", + "axe_avg.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", + "axe_avg.set_xlabel(\"index\")\n", + "axe_avg.legend(loc=\"best\")\n", + "\n", + "fig_avg.tight_layout()\n", + "del avg_100, avg_1000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key MSB and LSB histograms\n", + "Expected to be uniform over [0, 255]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:47:32.875112Z", + "start_time": "2019-03-19T12:47:32.542216Z" + }, + "hide_input": false + }, + "outputs": [], + "source": [ + "fig_priv_hists = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "priv_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data[\"priv\"])), dtype=np.dtype(\"u1\"))\n", + "priv_lsb = np.array(list(map(lambda x: x & 0xff, data[\"priv\"])), dtype=np.dtype(\"u1\"))\n", + "axe_msb_s_hist = fig_priv_hists.add_subplot(2, 1, 1, title=\"Private key MSB\")\n", + "axe_lsb_s_hist = fig_priv_hists.add_subplot(2, 1, 2, title=\"Private key LSB\")\n", + "msb_h = plot_hist(axe_msb_s_hist, priv_msb, \"private key MSB\", log_scale, False, False)\n", + "lsb_h = plot_hist(axe_lsb_s_hist, priv_lsb, \"private key LSB\", log_scale, False, False)\n", + "\n", + "fig_priv_hists.tight_layout()\n", + "del priv_msb, priv_lsb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key bit length vs time heatmap\n", + "Also contains private key bit length histogram, which is expected to be axis flipped geometric distribution with $p = \\frac{1}{2}$ peaking at the bit size of the order of the curve." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:47:34.638019Z", + "start_time": "2019-03-19T12:47:34.479903Z" + } + }, + "outputs": [], + "source": [ + "fig_bl = plt.figure(figsize=(10.5, 12), dpi=90)\n", + "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", + "axe_bl_heat = fig_bl.add_subplot(gs[0], title=\"Private key bit length vs keygen time\")\n", + "axe_bl_hist = fig_bl.add_subplot(gs[1], sharex=axe_bl_heat, title=\"Private key bit length\")\n", + "bl_data = np.array(list(map(lambda x: x.bit_length(), data_trimmed[\"priv\"])), dtype=np.dtype(\"u2\"))\n", + "\n", + "h, xe, ye = np.histogram2d(bl_data, data_trimmed[\"time\"], bins=[max(bl_data) - min(bl_data), hist_size_time_trim])\n", + "im = axe_bl_heat.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", + "axe_bl_heat.set_xlabel(\"private key bit length\")\n", + "axe_bl_heat.set_ylabel(\"key agreement time ({})\".format(time_disp_unit))\n", + "\n", + "plot_hist(axe_bl_hist, bl_data, \"Private key bit length\", log_scale, align=\"right\")\n", + "\n", + "fig_bl.tight_layout()\n", + "fig_bl.colorbar(im, ax=[axe_bl_heat, axe_bl_hist])\n", + "\n", + "del bl_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key bit length histogram given time\n", + "Interactively shows the histogram of private key bit length given a selected time range centered around `center` of width `width`. Ideally, the means of these conditional distributions are equal, while the variances can vary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_bl_time = plt.figure(figsize=(10.5, 5), dpi=90)\n", + "axe_bl_time = fig_bl_time.add_subplot(111)\n", + "axe_bl_time.set_autoscalex_on(False)\n", + "def f(center, width):\n", + " lower_bnd = center - width/2\n", + " upper_bnd = center + width/2\n", + " values = data_trimmed[np.logical_and(data_trimmed[\"time\"] <= upper_bnd,\n", + " data_trimmed[\"time\"] >= lower_bnd)]\n", + " axe_bl_time.clear()\n", + " axe_bl_time.set_title(\"Private key bit length, given key agreement time $\\in ({}, {})$ {}\".format(int(lower_bnd), int(upper_bnd), time_disp_unit))\n", + " bl_data = np.array(list(map(lambda x: x.bit_length(), values[\"priv\"])), dtype=np.dtype(\"u2\"))\n", + " plot_hist(axe_bl_time, bl_data, \"private key bit length\", bins=11, range=(bit_size-10, bit_size+1), align=\"left\")\n", + " axe_bl_time.set_xlim((bit_size-10, bit_size))\n", + " fig_bl_time.tight_layout()\n", + "\n", + "center_w = widgets.IntSlider(min=min(data_trimmed[\"time\"]),\n", + " max=max(data_trimmed[\"time\"]),\n", + " step=1,\n", + " value=description_trim.mean,\n", + " continuous_update=False,\n", + " description=\"center {}\".format(time_disp_unit))\n", + "width_w = widgets.IntSlider(min=1, max=100, continuous_update=False,\n", + " description=\"width {}\".format(time_disp_unit))\n", + "w = interactive(f, center=center_w,\n", + " width=width_w)\n", + "display(w)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validation\n", + "Perform some tests on the produced data and compare to expected results.\n", + "\n", + "This requires some information about the used curve, enter it below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-18T18:36:15.492599Z", + "start_time": "2019-03-18T18:36:12.008827Z" + } + }, + "outputs": [], + "source": [ + "p_str = input(\"The prime specifying the finite field:\")\n", + "p = int(p_str, 16) if p_str.startswith(\"0x\") else int(p_str)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r_str = input(\"The order of the curve:\")\n", + "r = int(r_str, 16) if r_str.startswith(\"0x\") else int(r_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All of the following tests should pass (e.g. be true), given a large enough sample." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "max_priv = max(data[\"priv\"])\n", + "un = len(np.unique(data[\"priv\"])) != 1\n", + "if un:\n", + " print(\"Private keys are smaller than order:\\t\\t\\t\" + str(max_priv < r))\n", + " print(\"Private keys are larger than prime(if order > prime):\\t\" + str(r <= p or max_priv > p))\n", + " print(\"Private keys reach full bit length of order:\\t\\t\" + str(max_priv.bit_length() == r.bit_length()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T12:53:48.777395Z", + "start_time": "2019-03-19T12:53:48.766190Z" + } + }, + "outputs": [], + "source": [ + "if un:\n", + " print(\"Private key bit length (min, max):\" + str(min(data[\"priv\"]).bit_length()) + \", \" + str(max(data[\"priv\"]).bit_length()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "@webio": { + "lastCommId": "709d7f1e9dab427486f950933d243d24", + "lastKernelId": "cf0cea00-5a1b-4e6e-b81a-40a069517c67" + }, + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analysis/plot_dsa.ipynb b/analysis/plot_dsa.ipynb new file mode 100644 index 0000000..abd6531 --- /dev/null +++ b/analysis/plot_dsa.ipynb @@ -0,0 +1,743 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysis of signature data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:04:46.257111Z", + "start_time": "2019-03-22T09:04:43.955081Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib notebook\n", + "import numpy as np\n", + "from scipy.stats import describe\n", + "from scipy.stats import norm as norm_dist\n", + "from scipy.stats.mstats import mquantiles\n", + "from math import log, sqrt\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib import ticker, colors, gridspec\n", + "from copy import deepcopy\n", + "from utils import plot_hist, moving_average, hw, time_scale, hist_size_func, recompute_nonces\n", + "from binascii import unhexlify\n", + "from IPython.display import display, HTML\n", + "from ipywidgets import interact, interactive, fixed, interact_manual\n", + "import ipywidgets as widgets\n", + "import tabulate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings\n", + "Enter your input below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:02.399284Z", + "start_time": "2019-03-22T09:05:02.391509Z" + } + }, + "outputs": [], + "source": [ + "# File name with output from ECTesterReader or ECTesterStandalone signatures.\n", + "fname = \"filename.csv\"\n", + "\n", + "# A hash algorithm used\n", + "hash_algo = \"SHA1\" # e.g. \"SHA1\" or None for no hash, raw data signatures\n", + "\n", + "# A curve name or a path to curve file, used to recompute the random nonces used in signing, if they are not present\n", + "# in the file. (ECTester was not able to recompute them for some reason)\n", + "curve = None # e.g. \"secg/secp256r1\" or \"secp256r1.csv\" or None for no curve.\n", + "\n", + "# The time unit used in displaying the plots. One of \"milli\", \"micro\", \"nano\".\n", + "# WARNING: Using nano might lead to very large plots/histograms and to the\n", + "# notebook to freeze or run out of memory, as well as bad visualization\n", + "# quality, due to noise and low density.\n", + "sign_unit = \"milli\"\n", + "verify_unit = \"milli\"\n", + "# A number which will be used to divide the time into sub-units, e.g. for 5, time will be in fifths of units\n", + "scaling_factor = 1\n", + "\n", + "# The amount of entries skipped from the beginning of the file, as they are usually outliers.\n", + "skip_first = 10\n", + "\n", + "# Whether to plot things in logarithmic scale or not.\n", + "log_scale = False\n", + "\n", + "# Whether to trim the time data outside the 1 - 99 percentile range (adjust below). Quite useful.\n", + "trim = True\n", + "\n", + "# How much to trim? Either a number in [0,1] signifying a quantile, or an absolute value signifying a threshold\n", + "trim_low = 0.01\n", + "trim_high = 0.99\n", + "\n", + "# Graphical (matplotlib) style name\n", + "style = \"ggplot\"\n", + "\n", + "# Color map to use, and what color to assign to \"bad\" values (necessary for log_scale)\n", + "color_map = plt.cm.viridis\n", + "color_map_bad = \"black\"\n", + "\n", + "# What function to use to calculate number of histogram bins of time\n", + "# one of \"sqrt\", \"sturges\", \"rice\", \"scott\" and \"fd\" or a number specifying the number of bins\n", + "hist_size = \"sturges\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:04.270940Z", + "start_time": "2019-03-22T09:05:03.059822Z" + } + }, + "outputs": [], + "source": [ + "# Setup plot style\n", + "\n", + "plt.style.use(style)\n", + "\n", + "cmap = deepcopy(color_map)\n", + "cmap.set_bad(color_map_bad)\n", + "\n", + "# Normalization, linear or log.\n", + "if log_scale:\n", + " norm = colors.LogNorm()\n", + "else:\n", + " norm = colors.Normalize()\n", + "\n", + "# Read the header line.\n", + "\n", + "with open(fname, \"r\") as f:\n", + " header = f.readline()\n", + "header_names = header.split(\";\")\n", + "if len(header_names) != 9:\n", + " print(\"Bad data?\")\n", + " exit(1)\n", + "\n", + "# Load the data\n", + "\n", + "hx = lambda x: int(x, 16)\n", + "data = np.genfromtxt(fname, delimiter=\";\", skip_header=1, converters={3: unhexlify, 4: unhexlify,\n", + " 5: hx, 6: unhexlify, 7: hx,\n", + " 8: lambda b: bool(int(b))},\n", + " dtype=np.dtype([(\"index\", \"u4\"), (\"sign_time\", \"u4\"), (\"verify_time\", \"u4\"),\n", + " (\"data\", \"O\"), (\"pub\", \"O\"), (\"priv\", \"O\"), (\"signature\", \"O\"),\n", + " (\"nonce\", \"O\"), (\"valid\", \"b\")]))\n", + "# Skip first (outliers?)\n", + "\n", + "data = data[skip_first:]\n", + "\n", + "# Setup the data\n", + "\n", + "# Convert time data\n", + "orig_sign_unit = header_names[1].split(\"[\")[1][:-1]\n", + "orig_verify_unit = header_names[2].split(\"[\")[1][:-1]\n", + "sign_disp_unit = time_scale(data[\"sign_time\"], orig_sign_unit, sign_unit, scaling_factor)\n", + "verify_disp_unit = time_scale(data[\"verify_time\"], orig_verify_unit, verify_unit, scaling_factor)\n", + "\n", + "if np.any(data[\"nonce\"] == None):\n", + " recompute_nonces(data, curve, hash_algo)\n", + "\n", + "# Trim times\n", + "quant_low_bound = trim_low if 0 <= trim_low <= 1 else 0.01\n", + "quant_high_bound = trim_high if 0 <= trim_high <= 1 else 0.95\n", + "quantiles_sign = mquantiles(data[\"sign_time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", + "if trim:\n", + " low_bound = quantiles_sign[0] if 0 <= trim_low <= 1 else trim_low\n", + " high_bound = quantiles_sign[4] if 0 <= trim_high <= 1 else trim_high\n", + " data_trimmed = data[np.logical_and(data[\"sign_time\"] >= low_bound,\n", + " data[\"sign_time\"] <= high_bound)]\n", + " quantiles_sign_trim = mquantiles(data_trimmed[\"sign_time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", + "else:\n", + " low_bound = None\n", + " high_bound = None\n", + " data_trimmed = data\n", + " quantiles_sign_trim = quantiles_sign\n", + "\n", + "description_sign = describe(data[\"sign_time\"])\n", + "description_sign_trim = describe(data_trimmed[\"sign_time\"])\n", + "\n", + "max_sign_time = description_sign.minmax[1]\n", + "min_sign_time = description_sign.minmax[0]\n", + "bit_size = len(bin(max(data[\"priv\"]))) - 2\n", + "byte_size = (bit_size + 7) // 8\n", + "bit_size = byte_size * 8\n", + "\n", + "hist_size_sign_time = hist_size_func(hist_size)(description_sign.nobs, min_sign_time, max_sign_time, description_sign.variance, quantiles_sign[1], quantiles_sign[3])\n", + "hist_size_sign_time_trim = hist_size_func(hist_size)(description_sign_trim.nobs, description_sign_trim.minmax[0], description_sign_trim.minmax[1], description_sign_trim.variance, quantiles_sign_trim[1], quantiles_sign_trim[3])\n", + "\n", + "if hist_size_sign_time < 30:\n", + " hist_size_sign_time = max_sign_time - min_sign_time\n", + "if hist_size_sign_time_trim < 30:\n", + " hist_size_sign_time_trim = description_sign_trim.minmax[1] - description_sign_trim.minmax[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:04.817352Z", + "start_time": "2019-03-22T09:05:04.804639Z" + } + }, + "outputs": [], + "source": [ + "display(\"Raw\")\n", + "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", + " description_sign]\n", + "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))\n", + "display(\"Trimmed\")\n", + "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", + " description_sign_trim]\n", + "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Selected quantiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:06.106323Z", + "start_time": "2019-03-22T09:05:06.090706Z" + } + }, + "outputs": [], + "source": [ + "tbl = [(quant_low_bound, \"0.25\", \"0.5\", \"0.75\", quant_high_bound),\n", + " list(map(lambda x: \"{} {}\".format(x, sign_disp_unit), quantiles_sign))]\n", + "display(HTML(tabulate.tabulate(tbl, tablefmt=\"html\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:06.799992Z", + "start_time": "2019-03-22T09:05:06.790754Z" + } + }, + "outputs": [], + "source": [ + "display(\"Bitsize:\", bit_size)\n", + "display(\"Histogram time bins: {}\".format(hist_size_sign_time))\n", + "display(\"Histogram time bins(trimmed): {}\".format(hist_size_sign_time_trim))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Nonce MSB vs signature time heatmap\n", + "The heatmap should show uncorrelated variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:08.567129Z", + "start_time": "2019-03-22T09:05:07.948181Z" + } + }, + "outputs": [], + "source": [ + "fig_nonce = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_nonce = fig_nonce.add_subplot(1, 1, 1, title=\"Nonce MSB vs signature time\")\n", + "nonce_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data_trimmed[\"nonce\"])), dtype=np.dtype(\"u1\"))\n", + "max_msb = max(nonce_msb)\n", + "min_msb = min(nonce_msb)\n", + "heatmap, xedges, yedges = np.histogram2d(nonce_msb, data_trimmed[\"sign_time\"],\n", + " bins=[max_msb - min_msb + 1, hist_size_sign_time_trim])\n", + "extent = [min_msb, max_msb, yedges[0], yedges[-1]]\n", + "im = axe_nonce.imshow(heatmap.T, extent=extent, aspect=\"auto\", cmap=cmap, origin=\"low\",\n", + " interpolation=\"nearest\", norm=norm)\n", + "axe_nonce.set_xlabel(\"nonce MSB value\")\n", + "axe_nonce.set_ylabel(\"signature time ({})\".format(sign_disp_unit))\n", + "fig_nonce.colorbar(im, ax=axe_nonce)\n", + "\n", + "fig_nonce.tight_layout()\n", + "del nonce_msb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Nonce Hamming Weight vs signature time heatmap\n", + "The heatmap should show uncorrelated variables.\n", + "\n", + "Also contains a nonce Hamming Weight histogram, which should be binomially distributed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:17.416586Z", + "start_time": "2019-03-22T09:05:16.928355Z" + } + }, + "outputs": [], + "source": [ + "fig_nonce_hist = plt.figure(figsize=(10.5, 12), dpi=90)\n", + "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", + "axe_nonce_hist = fig_nonce_hist.add_subplot(gs[0], title=\"Nonce Hamming weight vs signature time\")\n", + "axe_nonce_hist_hw = fig_nonce_hist.add_subplot(gs[1], sharex=axe_nonce_hist, title=\"Nonce Hamming weight\")\n", + "nonce_hw = np.array(list(map(hw, data_trimmed[\"nonce\"])), dtype=np.dtype(\"u2\"))\n", + "h, xe, ye = np.histogram2d(nonce_hw, data_trimmed[\"sign_time\"], bins=[max(nonce_hw) - min(nonce_hw), hist_size_sign_time_trim])\n", + "im = axe_nonce_hist.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", + "axe_nonce_hist.axvline(x=bit_size//2, alpha=0.7, linestyle=\"dotted\", color=\"white\", label=str(bit_size//2) + \" bits\")\n", + "axe_nonce_hist.set_xlabel(\"nonce Hamming weight\")\n", + "axe_nonce_hist.set_ylabel(\"signature time ({})\".format(sign_disp_unit))\n", + "axe_nonce_hist.legend(loc=\"best\")\n", + "\n", + "plot_hist(axe_nonce_hist_hw, nonce_hw, \"nonce Hamming weight\", log_scale, True, True)\n", + "\n", + "param = norm_dist.fit(nonce_hw)\n", + "pdf_range = np.arange(min(nonce_hw), max(nonce_hw))\n", + "norm_pdf = norm_dist.pdf(pdf_range, *param[:-2], loc=param[-2], scale=param[-1]) * description_sign_trim.nobs\n", + "axe_nonce_hist_hw.plot(pdf_range, norm_pdf, label=\"fitted normal distribution\")\n", + "axe_nonce_hist_hw.legend(loc=\"best\")\n", + "\n", + "\n", + "display(HTML(\"<b>Nonce Hamming weight fitted with normal distribution:</b>\"))\n", + "display(HTML(tabulate.tabulate([(\"Mean\", \"Variance\"), param], tablefmt=\"html\")))\n", + "\n", + "fig_nonce_hist.tight_layout()\n", + "fig_nonce_hist.colorbar(im, ax=[axe_nonce_hist, axe_nonce_hist_hw])\n", + "del nonce_hw" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Signature time histogram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-21T16:18:48.188494Z", + "start_time": "2019-03-21T16:18:39.850301Z" + } + }, + "outputs": [], + "source": [ + "fig_sig_hist = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_hist_full = fig_sig_hist.add_subplot(2, 1, 1, title=\"Signature time\")\n", + "axe_hist_trim = fig_sig_hist.add_subplot(2, 1, 2, title=\"Signature time (trimmed)\")\n", + "plot_hist(axe_hist_full, data[\"sign_time\"], \"signature time ({})\".format(sign_disp_unit), log_scale, hist_size_sign_time);\n", + "plot_hist(axe_hist_trim, data_trimmed[\"sign_time\"], \"signature time ({})\".format(sign_disp_unit), log_scale, hist_size_sign_time_trim);\n", + "fig_sig_hist.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Verification time histogram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-21T16:19:05.618320Z", + "start_time": "2019-03-21T16:18:53.161932Z" + } + }, + "outputs": [], + "source": [ + "fig_vrfy_hist = plt.figure(figsize=(10.5, 5), dpi=90)\n", + "axe_hist_full = fig_vrfy_hist.add_subplot(1, 1, 1, title=\"Verification time\")\n", + "plot_hist(axe_hist_full, data[\"verify_time\"], \"verification time ({})\".format(verify_disp_unit), log_scale, hist_size_sign_time);\n", + "fig_vrfy_hist.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Moving averages of signature and verification times" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:24.311923Z", + "start_time": "2019-03-22T09:05:24.063585Z" + } + }, + "outputs": [], + "source": [ + "fig_avg = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_sign_avg = fig_avg.add_subplot(2, 1, 1, title=\"Moving average of signature time\")\n", + "axe_vrfy_avg = fig_avg.add_subplot(2, 1, 2, sharex=axe_sign_avg, title=\"Moving average of verification time\")\n", + "avg_sign_100 = moving_average(data[\"sign_time\"], 100)\n", + "avg_sign_1000 = moving_average(data[\"sign_time\"], 1000)\n", + "axe_sign_avg.plot(avg_sign_100, label=\"window = 100\")\n", + "axe_sign_avg.plot(avg_sign_1000, label=\"window = 1000\")\n", + "if low_bound is not None:\n", + " axe_sign_avg.axhline(y=low_bound, alpha=0.7, linestyle=\"dotted\", color=\"green\", label=\"Low trim bound = {}\".format(low_bound))\n", + "if high_bound is not None:\n", + " axe_sign_avg.axhline(y=high_bound, alpha=0.7, linestyle=\"dotted\", color=\"orange\", label=\"Hight trim bound = {}\".format(high_bound))\n", + "axe_sign_avg.set_ylabel(\"signature time ({})\".format(sign_disp_unit))\n", + "axe_sign_avg.set_xlabel(\"index\")\n", + "axe_sign_avg.legend(loc=\"best\")\n", + "\n", + "avg_vrfy_100 = moving_average(data[\"verify_time\"], 100)\n", + "avg_vrfy_1000 = moving_average(data[\"verify_time\"], 1000)\n", + "axe_vrfy_avg.plot(avg_vrfy_100, label=\"window = 100\")\n", + "axe_vrfy_avg.plot(avg_vrfy_1000, label=\"window = 1000\")\n", + "axe_vrfy_avg.set_ylabel(\"verification time ({})\".format(verify_disp_unit))\n", + "axe_vrfy_avg.set_xlabel(\"index\")\n", + "axe_vrfy_avg.legend(loc=\"best\")\n", + "\n", + "fig_avg.tight_layout()\n", + "\n", + "del avg_sign_100, avg_sign_1000, avg_vrfy_100, avg_vrfy_1000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Nonce MSB and LSB histograms\n", + "Expected to be uniform over [0, 255]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:05:29.427210Z", + "start_time": "2019-03-22T09:05:28.768399Z" + } + }, + "outputs": [], + "source": [ + "fig_nonce_hists = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "nonce_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data[\"nonce\"])), dtype=np.dtype(\"u1\"))\n", + "nonce_lsb = np.array(list(map(lambda x: x & 0xff, data[\"nonce\"])), dtype=np.dtype(\"u1\"))\n", + "axe_msb_n_hist = fig_nonce_hists.add_subplot(2, 1, 1, title=\"Nonce MSB\")\n", + "axe_lsb_n_hist = fig_nonce_hists.add_subplot(2, 1, 2, title=\"Nonce LSB\")\n", + "plot_hist(axe_msb_n_hist, nonce_msb, \"nonce MSB\", log_scale, False, False)\n", + "plot_hist(axe_lsb_n_hist, nonce_lsb, \"nonce LSB\", log_scale, False, False)\n", + "\n", + "fig_nonce_hists.tight_layout()\n", + "del nonce_msb, nonce_lsb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Nonce bit length vs signature time heatmap\n", + "Also contains nonce bit length histogram, which is expected to be axis flipped geometric distribution with $p = \\frac{1}{2}$ peaking at the bit size of the order of the curve." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:06:00.061206Z", + "start_time": "2019-03-22T09:05:59.817227Z" + } + }, + "outputs": [], + "source": [ + "fig_bl = plt.figure(figsize=(10.5, 12), dpi=90)\n", + "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", + "axe_bl_heat = fig_bl.add_subplot(gs[0], title=\"Nonce bit length vs signature time\")\n", + "axe_bl_hist = fig_bl.add_subplot(gs[1], sharex=axe_bl_heat, title=\"Nonce bit length\")\n", + "bl_data = np.array(list(map(lambda x: x.bit_length(), data_trimmed[\"nonce\"])), dtype=np.dtype(\"u2\"))\n", + "\n", + "h, xe, ye = np.histogram2d(bl_data, data_trimmed[\"sign_time\"], bins=[max(bl_data) - min(bl_data), hist_size_sign_time_trim])\n", + "im = axe_bl_heat.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", + "axe_bl_heat.set_xlabel(\"nonce bit length\")\n", + "axe_bl_heat.set_ylabel(\"signature time ({})\".format(sign_disp_unit))\n", + "\n", + "plot_hist(axe_bl_hist, bl_data, \"nonce bit length\", log_scale, align=\"right\")\n", + "\n", + "fig_bl.tight_layout()\n", + "fig_bl.colorbar(im, ax=[axe_bl_heat, axe_bl_hist])\n", + "\n", + "del bl_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Nonce bit length histogram given time\n", + "Interactively shows the histogram of nonce bit length given a selected time range centered around `center` of width `width`. Ideally, the means of these conditional distributions are equal, while the variances can vary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-22T09:06:16.571781Z", + "start_time": "2019-03-22T09:06:16.336312Z" + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "fig_bl_time = plt.figure(figsize=(10.5, 5), dpi=90)\n", + "axe_bl_time = fig_bl_time.add_subplot(111)\n", + "axe_bl_time.set_autoscalex_on(False)\n", + "def f(center, width):\n", + " lower_bnd = center - width/2\n", + " upper_bnd = center + width/2\n", + " values = data_trimmed[np.logical_and(data_trimmed[\"sign_time\"] <= upper_bnd,\n", + " data_trimmed[\"sign_time\"] >= lower_bnd)]\n", + " axe_bl_time.clear()\n", + " axe_bl_time.set_title(\"Nonce bit length, given signature time $\\in ({}, {})$ {}\".format(int(lower_bnd), int(upper_bnd), sign_disp_unit))\n", + " bl_data = np.array(list(map(lambda x: x.bit_length(), values[\"nonce\"])), dtype=np.dtype(\"u2\"))\n", + " plot_hist(axe_bl_time, bl_data, \"nonce bit length\", bins=11, range=(bit_size-10, bit_size+1), align=\"left\")\n", + " axe_bl_time.set_xlim((bit_size-10, bit_size))\n", + " fig_bl_time.tight_layout()\n", + "\n", + "center_w = widgets.IntSlider(min=min(data_trimmed[\"sign_time\"]),\n", + " max=max(data_trimmed[\"sign_time\"]),\n", + " step=1,\n", + " value=description_sign_trim.mean,\n", + " continuous_update=False,\n", + " description=\"center {}\".format(sign_disp_unit))\n", + "width_w = widgets.IntSlider(min=1, max=100, continuous_update=False,\n", + " description=\"width {}\".format(sign_disp_unit))\n", + "w = interactive(f, center=center_w,\n", + " width=width_w)\n", + "display(w)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validation\n", + "Perform some tests on the produced data and compare to expected results.\n", + "\n", + "\n", + "This requires some information about the used curve, enter it below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-21T15:24:57.397880Z", + "start_time": "2019-03-21T15:24:37.395614Z" + } + }, + "outputs": [], + "source": [ + "p_str = input(\"The prime specifying the finite field:\")\n", + "p = int(p_str, 16) if p_str.startswith(\"0x\") else int(p_str)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-21T15:25:05.137250Z", + "start_time": "2019-03-21T15:24:59.218945Z" + } + }, + "outputs": [], + "source": [ + "r_str = input(\"The order of the curve:\")\n", + "r = int(r_str, 16) if r_str.startswith(\"0x\") else int(r_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All of the following tests should pass (e.g. be true), given a large enough sample." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-21T16:23:08.618543Z", + "start_time": "2019-03-21T16:23:08.451827Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "max_priv = max(data[\"priv\"])\n", + "max_nonce = max(data[\"nonce\"])\n", + "un = len(np.unique(data[\"priv\"])) != 1\n", + "if un:\n", + " print(\"Private keys are smaller than order:\\t\\t\\t\" + str(max_priv < r))\n", + " print(\"Private keys are larger than prime(if order > prime):\\t\" + str(r <= p or max_priv > p))\n", + "print(\"Nonces are smaller than order:\\t\\t\\t\\t\" + str(max_nonce < r))\n", + "print(\"Nonces are larger than prime(if order > prime):\\t\\t\" + str(r <= p or max_nonce > p))\n", + "if un:\n", + " print(\"Private keys reach full bit length of order:\\t\\t\" + str(max_priv.bit_length() == r.bit_length()))\n", + "print(\"Nonces reach full bit length of order:\\t\\t\\t\" + str(max_nonce.bit_length() == r.bit_length()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-21T16:23:09.355514Z", + "start_time": "2019-03-21T16:23:09.315702Z" + } + }, + "outputs": [], + "source": [ + "if un:\n", + " print(\"Private key bit length (min, max):\" + str(min(data[\"priv\"]).bit_length()) + \", \" + str(max(data[\"priv\"]).bit_length()))\n", + "print(\"Nonce bit length (min, max):\" + str(min(data[\"nonce\"]).bit_length()) + \", \" + str(max(data[\"nonce\"]).bit_length()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Nonce uniqueness (no duplicates):\" + str(len(np.unique(data[\"nonce\"])) == len(data[\"nonce\"])))" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": "a38f080b9a044da08882846212c38d91", + "lastKernelId": "4cad5b27-583d-4c4e-947c-f47bdf2d4754" + }, + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analysis/plot_gen.ipynb b/analysis/plot_gen.ipynb new file mode 100644 index 0000000..8e6913f --- /dev/null +++ b/analysis/plot_gen.ipynb @@ -0,0 +1,755 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analysis of key generation data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:38.893311Z", + "start_time": "2019-03-17T19:16:37.845017Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib notebook\n", + "import numpy as np\n", + "from scipy.stats import describe\n", + "from scipy.stats import norm as norm_dist\n", + "from scipy.stats.mstats import mquantiles\n", + "from math import log, sqrt\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib import ticker, colors, gridspec\n", + "from copy import deepcopy\n", + "from utils import plot_hist, moving_average, hw, time_scale\n", + "from binascii import unhexlify\n", + "from IPython.display import display, HTML\n", + "from ipywidgets import interact, interactive, fixed, interact_manual\n", + "import ipywidgets as widgets\n", + "import tabulate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings\n", + "Enter your input below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:38.911566Z", + "start_time": "2019-03-17T19:16:38.900168Z" + } + }, + "outputs": [], + "source": [ + "# File name with output from ECTesterReader or ECTesterStandalone key generation.\n", + "fname = \"filename.csv\"\n", + "\n", + "# The time unit used in displaying the plots. One of \"milli\", \"micro\", \"nano\".\n", + "# WARNING: Using nano might lead to very large plots/histograms and to the\n", + "# notebook to freeze or run out of memory, as well as bad visualization\n", + "# quality, due to noise and low density.\n", + "gen_unit = \"milli\" # Unit of key generation command\n", + "export_unit = \"milli\" # Unit of key export command\n", + "# A number which will be used to divide the time into sub-units, e.g. for 5, time will be in fifths of units\n", + "scaling_factor = 1\n", + "\n", + "# The amount of entries skipped from the beginning of the file, as they are usually outliers.\n", + "skip_first = 10\n", + "\n", + "# Whether to plot things in logarithmic scale or not.\n", + "log_scale = False\n", + "\n", + "# Whether to trim the time data outside the 1 - 99 percentile range (adjust below). Quite useful.\n", + "trim = True\n", + "\n", + "# How much to trim? Either a number in [0,1] signifying a quantile, or an absolute value signifying a threshold\n", + "trim_low = 0.01\n", + "trim_high = 0.99\n", + "\n", + "# Graphical (matplotlib) style name\n", + "style = \"ggplot\"\n", + "\n", + "# Color map to use, and what color to assign to \"bad\" values (necessary for log_scale)\n", + "color_map = plt.cm.viridis\n", + "color_map_bad = \"black\"\n", + "\n", + "# What function to use to calculate number of histogram bins of time\n", + "# one of \"sqrt\", \"sturges\", \"rice\", \"scott\" and \"fd\" or a number specifying the number of bins\n", + "hist_size = \"sturges\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:39.733575Z", + "start_time": "2019-03-17T19:16:39.728385Z" + } + }, + "outputs": [], + "source": [ + "# Setup plot style\n", + "\n", + "plt.style.use(style)\n", + "\n", + "cmap = deepcopy(color_map)\n", + "cmap.set_bad(color_map_bad)\n", + "\n", + "# Normalization, linear or log.\n", + "if log_scale:\n", + " norm = colors.LogNorm()\n", + "else:\n", + " norm = colors.Normalize()\n", + "\n", + "# Read the header line.\n", + "\n", + "with open(fname, \"r\") as f:\n", + " header = f.readline()\n", + "header_names = header.split(\";\")\n", + "if len(header_names) not in (4, 5):\n", + " print(\"Bad data?\")\n", + " exit(1)\n", + "\n", + "# Load the data\n", + "\n", + "hx = lambda x: int(x, 16)\n", + "if len(header_names) == 4:\n", + " data = np.genfromtxt(fname, delimiter=\";\", skip_header=1, converters={2: unhexlify, 3: hx},\n", + " dtype=np.dtype([(\"index\", \"u4\"), (\"gen_time\", \"u4\"), (\"pub\", \"O\"), (\"priv\", \"O\")]))\n", + "else:\n", + " data = np.genfromtxt(fname, delimiter=\";\", skip_header=1, converters={3: unhexlify, 4: hx},\n", + " dtype=np.dtype([(\"index\", \"u4\"), (\"gen_time\", \"u4\"), (\"export_time\", \"u4\"),\n", + " (\"pub\", \"O\"), (\"priv\", \"O\")]))\n", + "\n", + "# Skip first (outliers?)\n", + "\n", + "data = data[skip_first:]\n", + "\n", + "# Setup the data\n", + "\n", + "# Convert time data\n", + "orig_gen_unit = header_names[1].split(\"[\")[1][:-1]\n", + "gen_disp_unit = time_scale(data[\"gen_time\"], orig_gen_unit, gen_unit, scaling_factor)\n", + "if len(header_names) == 5:\n", + " orig_export_unit = header_names[2].split(\"[\")[1][:-1]\n", + " export_disp_unit = time_scale(data[\"export_time\"], orig_export_unit, export_unit, scaling_factor)\n", + "\n", + "# Trim gen times\n", + "quant_low_bound = trim_low if 0 <= trim_low <= 1 else 0.01\n", + "quant_high_bound = trim_high if 0 <= trim_high <= 1 else 0.95\n", + "quantiles_gen = mquantiles(data[\"gen_time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", + "if trim:\n", + " low_bound = quantiles_gen[0] if 0 <= trim_low <= 1 else trim_low\n", + " high_bound = quantiles_gen[4] if 0 <= trim_high <= 1 else trim_high\n", + " data_trimmed = data[np.logical_and(data[\"gen_time\"] >= low_bound,\n", + " data[\"gen_time\"] <= high_bound)]\n", + " quantiles_gen_trim = mquantiles(data_trimmed[\"gen_time\"], prob=(quant_low_bound, 0.25, 0.5, 0.75, quant_high_bound))\n", + "else:\n", + " low_bound = None\n", + " high_bound = None\n", + " data_trimmed = data\n", + " quantiles_gen_trim = quantiles_gen\n", + "\n", + "description_gen = describe(data[\"gen_time\"])\n", + "description_gen_trim = describe(data_trimmed[\"gen_time\"])\n", + "\n", + "max_gen_time = description_gen.minmax[1]\n", + "min_gen_time = description_gen.minmax[0]\n", + "bit_size = len(bin(max(data[\"priv\"]))) - 2\n", + "byte_size = (bit_size + 7) // 8\n", + "bit_size = byte_size * 8\n", + "\n", + "if hist_size == \"sqrt\":\n", + " hist_size_func = lambda n, xmin, xmax, var, xlower, xupper: int(sqrt(n)) + 1\n", + "elif hist_size == \"sturges\":\n", + " hist_size_func = lambda n, xmin, xmax, var, xlower, xupper: int(log(n, 2)) + 1\n", + "elif hist_size == \"rice\":\n", + " hist_size_func = lambda n, xmin, xmax, var, xlower, xupper: int(2 * n**(1/3))\n", + "elif hist_size == \"scott\":\n", + " hist_size_func = lambda n, xmin, xmax, var, xlower, xupper: (xmax - xmin) // int((3.5 * sqrt(var)) / (n**(1/3)))\n", + "elif hist_size == \"fd\":\n", + " hist_size_func = lambda n, xmin, xmax, var, xlower, xupper: (xmax - xmin) // int(2 * (xupper - xlower) / (n**(1/3)))\n", + "else:\n", + " hist_size_func = lambda n, xmin, xmax, var, xlower, xupper: hist_size\n", + "\n", + "hist_size_gen_time = hist_size_func(description_gen.nobs, min_gen_time, max_gen_time, description_gen.variance, quantiles_gen[1], quantiles_gen[3])\n", + "hist_size_gen_time_trim = hist_size_func(description_gen_trim.nobs, description_gen_trim.minmax[0], description_gen_trim.minmax[1], description_gen_trim.variance, quantiles_gen_trim[1], quantiles_gen_trim[3])\n", + "\n", + "if hist_size_gen_time < 30:\n", + " hist_size_gen_time = max_gen_time - min_gen_time\n", + "if hist_size_gen_time_trim < 30:\n", + " hist_size_gen_time_trim = description_gen_trim.minmax[1] - description_gen_trim.minmax[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:43.343937Z", + "start_time": "2019-03-17T19:16:43.329900Z" + } + }, + "outputs": [], + "source": [ + "display(\"Raw\")\n", + "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", + " description_gen]\n", + "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))\n", + "display(\"Trimmed\")\n", + "desc = [(\"N\", \"min, max\", \"mean\", \"variance\", \"skewness\", \"kurtosis\"),\n", + " description_gen_trim]\n", + "display(HTML(tabulate.tabulate(desc, tablefmt=\"html\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Selected quantiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:44.058425Z", + "start_time": "2019-03-17T19:16:44.043877Z" + } + }, + "outputs": [], + "source": [ + "tbl = [(quant_low_bound, \"0.25\", \"0.5\", \"0.75\", quant_high_bound),\n", + " list(map(lambda x: \"{} {}\".format(x, gen_disp_unit), quantiles_gen))]\n", + "display(HTML(tabulate.tabulate(tbl, tablefmt=\"html\")))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-19T13:41:01.468943Z", + "start_time": "2019-03-19T13:41:01.417360Z" + } + }, + "outputs": [], + "source": [ + "display(\"Bitsize:\", bit_size)\n", + "display(\"Histogram time bins: {}\".format(hist_size_gen_time))\n", + "display(\"Histogram time bins(trimmed): {}\".format(hist_size_gen_time_trim))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key MSB vs time heatmap\n", + "The heatmap should show uncorrelated variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:45.995145Z", + "start_time": "2019-03-17T19:16:45.802741Z" + } + }, + "outputs": [], + "source": [ + "fig_private = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_private = fig_private.add_subplot(1, 1, 1, title=\"Private key MSB vs keygen time\")\n", + "priv_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data_trimmed[\"priv\"])), dtype=np.dtype(\"u1\"))\n", + "max_msb = max(priv_msb)\n", + "min_msb = min(priv_msb)\n", + "heatmap, xedges, yedges = np.histogram2d(priv_msb, data_trimmed[\"gen_time\"],\n", + " bins=[max_msb - min_msb + 1, hist_size_gen_time_trim])\n", + "extent = [min_msb, max_msb, yedges[0], yedges[-1]]\n", + "im = axe_private.imshow(heatmap.T, extent=extent, aspect=\"auto\", cmap=cmap, origin=\"low\",\n", + " interpolation=\"nearest\", norm=norm)\n", + "axe_private.set_xlabel(\"private key MSB value\")\n", + "axe_private.set_ylabel(\"keygen time ({})\".format(gen_disp_unit))\n", + "fig_private.colorbar(im, ax=axe_private)\n", + "\n", + "fig_private.tight_layout()\n", + "del priv_msb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key Hamming Weight vs time heatmap\n", + "The heatmap should show uncorrelated variables.\n", + "\n", + "Also contains a private key Hamming Weight histogram, which should be binomially distributed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:49.890330Z", + "start_time": "2019-03-17T19:16:47.357225Z" + } + }, + "outputs": [], + "source": [ + "fig_priv_hist = plt.figure(figsize=(10.5, 12), dpi=90)\n", + "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", + "axe_priv_hist = fig_priv_hist.add_subplot(gs[0], title=\"Private key Hamming weight vs keygen time\")\n", + "axe_priv_hist_hw = fig_priv_hist.add_subplot(gs[1], sharex=axe_priv_hist, title=\"Private key Hamming weight\")\n", + "priv_hw = np.array(list(map(hw, data_trimmed[\"priv\"])), dtype=np.dtype(\"u2\"))\n", + "h, xe, ye = np.histogram2d(priv_hw, data_trimmed[\"gen_time\"], bins=[max(priv_hw) - min(priv_hw), hist_size_gen_time_trim])\n", + "im = axe_priv_hist.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", + "axe_priv_hist.axvline(x=bit_size//2, alpha=0.7, linestyle=\"dotted\", color=\"white\", label=str(bit_size//2) + \" bits\")\n", + "axe_priv_hist.set_xlabel(\"private key Hamming weight\")\n", + "axe_priv_hist.set_ylabel(\"keygen time ({})\".format(gen_disp_unit))\n", + "axe_priv_hist.legend(loc=\"best\")\n", + "\n", + "plot_hist(axe_priv_hist_hw, priv_hw, \"private key Hamming weight\", log_scale, None)\n", + "\n", + "param = norm_dist.fit(priv_hw)\n", + "pdf_range = np.arange(min(priv_hw), max(priv_hw))\n", + "norm_pdf = norm_dist.pdf(pdf_range, *param[:-2], loc=param[-2], scale=param[-1]) * description_gen_trim.nobs\n", + "axe_priv_hist_hw.plot(pdf_range, norm_pdf, label=\"fitted normal distribution\")\n", + "axe_priv_hist_hw.legend(loc=\"best\")\n", + "\n", + "\n", + "display(HTML(\"<b>Private key Hamming weight fitted with normal distribution:</b>\"))\n", + "display(HTML(tabulate.tabulate([(\"Mean\", \"Variance\"), param], tablefmt=\"html\")))\n", + "\n", + "fig_priv_hist.tight_layout()\n", + "fig_priv_hist.colorbar(im, ax=[axe_priv_hist, axe_priv_hist_hw])\n", + "del priv_hw" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key generation time histogram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:52.605277Z", + "start_time": "2019-03-17T19:16:50.114281Z" + } + }, + "outputs": [], + "source": [ + "fig_kg_hist = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "axe_hist_full = fig_kg_hist.add_subplot(2, 1, 1)\n", + "axe_hist_trim = fig_kg_hist.add_subplot(2, 1, 2)\n", + "plot_hist(axe_hist_full, data[\"gen_time\"], \"keygen time ({})\".format(gen_disp_unit), log_scale, hist_size_gen_time);\n", + "plot_hist(axe_hist_trim, data_trimmed[\"gen_time\"], \"keygen time ({})\".format(gen_disp_unit), log_scale, hist_size_gen_time_trim);\n", + "fig_kg_hist.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Key export time histogram\n", + "*Available only for ECTesterReader and keys generated on cards.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:52.610858Z", + "start_time": "2019-03-17T19:16:52.607191Z" + } + }, + "outputs": [], + "source": [ + "if \"export_time\" in data.dtype.names:\n", + " fig_exp_hist = plt.figure(figsize=(10.5, 8), dpi=90)\n", + " axe_hist_full = fig_exp_hist.add_subplot(2, 1, 1)\n", + " axe_hist_trim = fig_exp_hist.add_subplot(2, 1, 2)\n", + " plot_hist(axe_hist_full, data[\"export_time\"], \"export time ({})\".format(export_disp_unit), log_scale, hist_size_gen_time);\n", + " plot_hist(axe_hist_trim, data_trimmed[\"export_time\"], \"export time ({})\".format(export_disp_unit), log_scale, hist_size_gen_time_trim);\n", + " fig_exp_hist.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Moving averages of key generation time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:54.504830Z", + "start_time": "2019-03-17T19:16:54.409189Z" + } + }, + "outputs": [], + "source": [ + "fig_avg = plt.figure(figsize=(10.5, 7), dpi=90)\n", + "axe_avg = fig_avg.add_subplot(1, 1, 1, title=\"Moving average of key generation time\")\n", + "avg_100 = moving_average(data[\"gen_time\"], 100)\n", + "avg_1000 = moving_average(data[\"gen_time\"], 1000)\n", + "axe_avg.plot(avg_100, label=\"window = 100\")\n", + "axe_avg.plot(avg_1000, label=\"window = 1000\")\n", + "if low_bound is not None:\n", + " axe_avg.axhline(y=low_bound, alpha=0.7, linestyle=\"dotted\", color=\"green\", label=\"Low trim bound = {}\".format(low_bound))\n", + "if high_bound is not None:\n", + " axe_avg.axhline(y=high_bound, alpha=0.7, linestyle=\"dotted\", color=\"orange\", label=\"Hight trim bound = {}\".format(high_bound))\n", + "axe_avg.set_ylabel(\"keygen time ({})\".format(gen_disp_unit))\n", + "axe_avg.set_xlabel(\"index\")\n", + "axe_avg.legend(loc=\"best\")\n", + "\n", + "fig_avg.tight_layout()\n", + "del avg_100, avg_1000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key MSB and LSB histograms\n", + "Expected to be uniform over [0, 255]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:16:55.155285Z", + "start_time": "2019-03-17T19:16:54.508407Z" + } + }, + "outputs": [], + "source": [ + "fig_priv_hists = plt.figure(figsize=(10.5, 8), dpi=90)\n", + "priv_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data[\"priv\"])), dtype=np.dtype(\"u1\"))\n", + "priv_lsb = np.array(list(map(lambda x: x & 0xff, data[\"priv\"])), dtype=np.dtype(\"u1\"))\n", + "axe_msb_s_hist = fig_priv_hists.add_subplot(2, 1, 1, title=\"Private key MSB\")\n", + "axe_lsb_s_hist = fig_priv_hists.add_subplot(2, 1, 2, title=\"Private key LSB\")\n", + "plot_hist(axe_msb_s_hist, priv_msb, \"private key MSB\", log_scale)\n", + "plot_hist(axe_lsb_s_hist, priv_lsb, \"private key LSB\", log_scale)\n", + "\n", + "fig_priv_hists.tight_layout()\n", + "del priv_msb, priv_lsb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Public key coordinate MSB and LSB histograms\n", + "Expected to be somewhat uniform over [0, 255]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:17:06.443596Z", + "start_time": "2019-03-17T19:17:05.516616Z" + } + }, + "outputs": [], + "source": [ + "def _split(xy):\n", + " x = int.from_bytes(xy[1:byte_size + 1], byteorder=\"big\")\n", + " y = int.from_bytes(xy[1 + byte_size:], byteorder=\"big\")\n", + " return (x, y)\n", + "\n", + "pub_coords = np.array(list(map(_split, data[\"pub\"])), dtype=np.dtype(\"O\"))\n", + "xs = pub_coords[...,0]\n", + "ys = pub_coords[...,1]\n", + "fig_pub_hists = plt.figure(figsize=(10.5, 14), dpi=90)\n", + "\n", + "def _plot_coord(data, name, offset):\n", + " axe_msb_pub_hist = fig_pub_hists.add_subplot(4, 1, offset, title=\"{} coordinate MSB\".format(name))\n", + " axe_lsb_pub_hist = fig_pub_hists.add_subplot(4, 1, offset + 1, title=\"{} coordinate LSB\".format(name))\n", + " pub_msb = np.array(list(map(lambda x: x >> (bit_size - 8), data)))\n", + " pub_lsb = np.array(list(map(lambda x: x & 0xff, data)))\n", + " plot_hist(axe_msb_pub_hist, pub_msb, \"{} coordinate MSB\".format(name), log_scale)\n", + " plot_hist(axe_lsb_pub_hist, pub_lsb, \"{} coordinate LSB\".format(name), log_scale)\n", + " del pub_msb, pub_lsb\n", + "\n", + "_plot_coord(xs, \"X\", 1)\n", + "_plot_coord(ys, \"Y\", 3)\n", + "\n", + "fig_pub_hists.tight_layout()\n", + "\n", + "del pub_coords, xs, ys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key bit length vs time heatmap\n", + "Also contains private key bit length histogram, which is expected to be axis flipped geometric distribution with $p = \\frac{1}{2}$ peaking at the bit size of the order of the curve." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-17T19:25:51.126642Z", + "start_time": "2019-03-17T19:25:50.929170Z" + } + }, + "outputs": [], + "source": [ + "fig_bl = plt.figure(figsize=(10.5, 12), dpi=90)\n", + "gs = gridspec.GridSpec(2, 1, height_ratios=[2.5, 1])\n", + "axe_bl_heat = fig_bl.add_subplot(gs[0], title=\"Private key bit length vs keygen time\")\n", + "axe_bl_hist = fig_bl.add_subplot(gs[1], sharex=axe_bl_heat, title=\"Private key bit length\")\n", + "\n", + "bl_data = np.array(list(map(lambda x: x.bit_length(), data_trimmed[\"priv\"])), dtype=np.dtype(\"u2\"))\n", + "\n", + "h, xe, ye = np.histogram2d(bl_data, data_trimmed[\"gen_time\"], bins=[max(bl_data) - min(bl_data), hist_size_gen_time_trim])\n", + "im = axe_bl_heat.imshow(h.T, origin=\"low\", cmap=cmap, aspect=\"auto\", extent=[xe[0], xe[-1], ye[0], ye[-1]], norm=norm)\n", + "axe_bl_heat.set_xlabel(\"private key bit length\")\n", + "axe_bl_heat.set_ylabel(\"keygen time ({})\".format(gen_disp_unit))\n", + "\n", + "plot_hist(axe_bl_hist, bl_data, \"Private key bit length\", log_scale, align=\"right\")\n", + "\n", + "fig_priv_hist.tight_layout()\n", + "fig_priv_hist.colorbar(im, ax=[axe_bl_heat, axe_bl_hist])\n", + "\n", + "del bl_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Private key bit length histogram given time\n", + "Interactively shows the histogram of private key bit length given a selected time range centered around `center` of width `width`. Ideally, the means of these conditional distributions are equal, while the variances can vary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_bl_time = plt.figure(figsize=(10.5, 5), dpi=90)\n", + "axe_bl_time = fig_bl_time.add_subplot(111)\n", + "axe_bl_time.set_autoscalex_on(False)\n", + "def f(center, width):\n", + " lower_bnd = center - width/2\n", + " upper_bnd = center + width/2\n", + " values = data_trimmed[np.logical_and(data_trimmed[\"gen_time\"] <= upper_bnd,\n", + " data_trimmed[\"gen_time\"] >= lower_bnd)]\n", + " axe_bl_time.clear()\n", + " axe_bl_time.set_title(\"Private key bit length, given keygen time $\\in ({}, {})$ {}\".format(int(lower_bnd), int(upper_bnd), gen_disp_unit))\n", + " bl_data = np.array(list(map(lambda x: x.bit_length(), values[\"priv\"])), dtype=np.dtype(\"u2\"))\n", + " plot_hist(axe_bl_time, bl_data, \"private key bit length\", bins=11, range=(bit_size-10, bit_size+1), align=\"left\")\n", + " axe_bl_time.set_xlim((bit_size-10, bit_size))\n", + " fig_bl_time.tight_layout()\n", + "\n", + "center_w = widgets.IntSlider(min=min(data_trimmed[\"gen_time\"]),\n", + " max=max(data_trimmed[\"gen_time\"]),\n", + " step=1,\n", + " value=description_gen_trim.mean,\n", + " continuous_update=False,\n", + " description=\"center {}\".format(gen_disp_unit))\n", + "width_w = widgets.IntSlider(min=1, max=100, continuous_update=False,\n", + " description=\"width {}\".format(gen_disp_unit))\n", + "w = interactive(f, center=center_w,\n", + " width=width_w)\n", + "display(w)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validation\n", + "Perform some tests on the produced data and compare to expected results.\n", + "\n", + "This requires some information about the used curve, enter it below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-18T18:27:02.748493Z", + "start_time": "2019-03-18T18:27:01.294850Z" + } + }, + "outputs": [], + "source": [ + "p_str = input(\"The prime specifying the finite field:\")\n", + "p = int(p_str, 16) if p_str.startswith(\"0x\") else int(p_str)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2019-03-18T18:27:09.351619Z", + "start_time": "2019-03-18T18:27:08.674272Z" + } + }, + "outputs": [], + "source": [ + "r_str = input(\"The order of the curve:\")\n", + "r = int(r_str, 16) if r_str.startswith(\"0x\") else int(r_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All of the following tests should pass (e.g. be true), given a large enough sample:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "max_priv = max(data[\"priv\"])\n", + "\n", + "print(\"Private keys are smaller than order:\\t\\t\\t\" + str(max_priv < r))\n", + "print(\"Private keys are larger than prime(if order > prime):\\t\" + str(r <= p or max_priv > p))\n", + "print(\"Private keys reach full bit length of order:\\t\\t\" + str(max_priv.bit_length() == r.bit_length()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Private key bit length (min, max):\" + str(min(data[\"priv\"]).bit_length()) + \", \" + str(max(data[\"priv\"]).bit_length()))" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": "62b9fbe51a4c472094fee49d62cdb7ec", + "lastKernelId": "a920ac21-57cb-406f-8aef-476c8b0e7df5" + }, + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analysis/utils.py b/analysis/utils.py new file mode 100644 index 0000000..014c4c6 --- /dev/null +++ b/analysis/utils.py @@ -0,0 +1,111 @@ +import hashlib +import numpy as np +from matplotlib import ticker +from math import sqrt, log +import ec +from asn1crypto.core import Sequence + +def hw(i): + res = 0 + while i: + res += 1 + i &= i - 1 + return res + + +def moving_average(a, n) : + ret = np.cumsum(a, dtype=float) + ret[n:] = ret[n:] - ret[:-n] + return ret[n - 1:] / n + + +def time_scale(data, orig_unit, target_unit, scaling_factor): + if orig_unit == "instr": + return orig_unit + units = { + "milli": ("ms", 1000000), + "micro": (r"$\mu s$", 1000), + "nano": ("ns", 1) + } + upper = units[orig_unit][1] * scaling_factor + lower = units[target_unit][1] + if upper > lower: + data *= upper // lower + elif lower > upper: + np.floor_divide(data, lower // upper, data) + return (r"$\frac{1}{" + str(scaling_factor) + "}$" if scaling_factor != 1 else "") + units[target_unit][0] + +def recompute_nonces(data, curve_name, hash_algo): + try: + curve = ec.get_curve(curve_name) + except: + curve = ec.load_curve(curve_name) + verified = False + for elem in data: + if elem["nonce"] is not None: + continue + if elem["index"] % (len(data)//10) == 0: + print(".", end="") + if hash_algo is None: + hm = int.from_bytes(elem["data"], byteorder="big") % curve.group.n + else: + h = hashlib.new(hash_algo, elem["data"]) + hm = int(h.hexdigest(), 16) + if h.digest_size * 8 > curve.group.n.bit_length(): + hm >> h.digest_size * 8 - curve.group.n.bit_length() + hm = hm % curve.group.n + r, s = Sequence.load(elem["signature"]).native.values() + r = ec.Mod(r, curve.group.n) + s = ec.Mod(s, curve.group.n) + rx = r * elem["priv"] + hmrx = hm + rx + nonce = s.inverse() * hmrx + if not verified: + res = int(nonce) * curve.g + if int(res.x) % curve.group.n != int(r): + print("Nonce recomputation couldnt verify!") + raise ValueError + else: + print("Nonce recomputation works") + verified = True + elem["nonce"] = int(nonce) + +def hist_size_func(choice): + if choice == "sqrt": + return lambda n, xmin, xmax, var, xlower, xupper: int(sqrt(n)) + 1 + elif choice == "sturges": + return lambda n, xmin, xmax, var, xlower, xupper: int(log(n, 2)) + 1 + elif choice == "rice": + return lambda n, xmin, xmax, var, xlower, xupper: int(2 * n**(1/3)) + elif choice == "scott": + return lambda n, xmin, xmax, var, xlower, xupper: (xmax - xmin) // int((3.5 * sqrt(var)) / (n**(1/3))) + elif choice == "fd": + return lambda n, xmin, xmax, var, xlower, xupper: (xmax - xmin) // int(2 * (xupper - xlower) / (n**(1/3))) + else: + return lambda n, xmin, xmax, var, xlower, xupper: hist_size + + +def plot_hist(axes, data, xlabel=None, log=False, avg=True, median=True, bins=None, **kwargs): + time_max = max(data) + time_min = min(data) + time_avg = np.average(data) + time_median = np.median(data) + if bins is None: + bins = time_max - time_min + 1 + hist = axes.hist(data, bins=bins, log=log, **kwargs) + if avg: + axes.axvline(x=time_avg, alpha=0.7, linestyle="dotted", color="blue", label="avg = {}".format(time_avg)) + if median: + axes.axvline(x=time_median, alpha=0.7, linestyle="dotted", color="green", label="median = {}".format(time_median)) + axes.set_ylabel("count" + ("\n(log)" if log else "")) + axes.set_xlabel("time" if xlabel is None else xlabel) + axes.xaxis.set_major_locator(ticker.MaxNLocator()) + if avg or median: + axes.legend(loc="best") + return hist + + +def miller_correction(entropy, samples, bins): + return entropy + (bins - 1)/(2*samples) + + |
