From: Luke Kenneth Casson Leighton Date: Tue, 17 Sep 2019 07:41:14 +0000 (+0100) Subject: add vector CORDIC X-Git-Tag: convert-csv-opcode-to-binary~4043 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=49985d6de0dcf3413d91f6544d56b618b2cf9b65;p=libreriscv.git add vector CORDIC --- diff --git a/simple_v_extension/vector_ops.mdwn b/simple_v_extension/vector_ops.mdwn index edcd88ce7..02ba94b2a 100644 --- a/simple_v_extension/vector_ops.mdwn +++ b/simple_v_extension/vector_ops.mdwn @@ -8,6 +8,50 @@ In this extension, the subvector itself is typically the unit, although some ope Examples which can require SUBVL include cross product and may in future involve complex numbers. +## CORDIC + +CORDIC is an extremely general-purpose algorithm useful for a huge number +of diverse purposes. In its full form it does however require quite a +few parameters, one of which is a vector, making it awkward to include in +a standard "scalar" ISA. Additionally the coordinates can be set to circular, +linear or hyperbolic, producing three different modes, and the algorithm +may also be run in either "vector" mode or "rotation" mode. See [[discussion]] + +vx, vy = CORDIC(vx, vy, coordinate\_mode, beta) + + int i = 0; + int iterations = 0; // Number of times to run the algorithm + float arctanTable[iterations]; // in Radians + float K = 0.6073; // K + float v_x,v_y; // Vector v; x and y components + + for(i=0; i < iterations; i++) { + arctanTable[i] = atan(pow(2,-i)); + } + + float vnew_x; // To store the new value of x; + for(i = 0; i < iterations; i++) { + // If beta is negative, we need to do a counter-clockwise rotation: + if( beta < 0) { + vnew_x = v_x + (v_y*pow(2,-i)); + v_y -= (v_x*pow(2,-i)); + beta += arctanTable[i]; + } + // If beta is positive, we need to do a clockwise rotation: + else { + vnew_x = v_x - (v_y*pow(2,-i)); + v_y += (v_x*pow(2,-i)); + beta -= arctanTable[i]; + } + v_x = vnew_x; + } + v_x *= K; + v_y *= K; + +Links: + +* + ## Vector cross product Result is the cross product of x and y, i.e., the resulting components are, in order: @@ -18,10 +62,36 @@ Result is the cross product of x and y, i.e., the resulting components are, in o All the operands must be vectors of 3 components of a floating-point type. +Pseudocode: + + vec3 a, b; // elements in order a.x, a.y, a.z + // compute a cross b: + vec3 t1 = a.yzx; // produce vector [a.y, a.z, a.x] + vec3 t2 = b.zxy; + vec3 t3 = a.zxy; + vec3 t4 = b.yzx; + vec3 p = t3 * t4; + vec3 cross = t1 * t2 - p; + ## Vector dot product -Computes the dot product of two vectors. Internal accuracy must be greater than the -input vectors and the result. +Computes the dot product of two vectors. Internal accuracy must be +greater than the input vectors and the result. + +Pseudocode in python: + + from operator import mul + sum(map(mul, A, B)) + +Pseudocode in c: + + double dot_product(float v[], float u[], int n) + { + double result = 0.0; + for (int i = 0; i < n; i++) + result += v[i] * u[i]; + return result; + } ## Vector length @@ -31,7 +101,10 @@ The scalar length of a vector: ## Vector distance -The scalar distance between two vectors. Subtracts one vector from the other and returns length +The scalar distance between two vectors. Subtracts one vector from the +other and returns length: + + length(v0 - v1) ## Vector LERP @@ -56,6 +129,9 @@ Pseudocode: ## Vector SLERP +Not recommended as it is not commonly used and has several trigonometric +functions. + Pseudocode: @@ -100,6 +176,100 @@ Pseudocode: return (s0 * v0) + (s1 * v1); } +However this algorithm does not involve transcendentals except in +the computation of the tables: + + function v = cordic(beta,n) + % This function computes v = [cos(beta), sin(beta)] (beta in radians) + % using n iterations. Increasing n will increase the precision. + + if beta < -pi/2 || beta > pi/2 + if beta < 0 + v = cordic(beta + pi, n); + else + v = cordic(beta - pi, n); + end + v = -v; % flip the sign for second or third quadrant + return + end + + % Initialization of tables of constants used by CORDIC + % need a table of arctangents of negative powers of two, in radians: + % angles = atan(2.^-(0:27)); + angles = [ ... + 0.78539816339745 0.46364760900081 + 0.24497866312686 0.12435499454676 ... + 0.06241880999596 0.03123983343027 + 0.01562372862048 0.00781234106010 ... + 0.00390623013197 0.00195312251648 + 0.00097656218956 0.00048828121119 ... + 0.00024414062015 0.00012207031189 + 0.00006103515617 0.00003051757812 ... + 0.00001525878906 0.00000762939453 + 0.00000381469727 0.00000190734863 ... + 0.00000095367432 0.00000047683716 + 0.00000023841858 0.00000011920929 ... + 0.00000005960464 0.00000002980232 + 0.00000001490116 0.00000000745058 ]; + % and a table of products of reciprocal lengths of vectors [1, 2^-2j]: + % Kvalues = cumprod(1./abs(1 + 1j*2.^(-(0:23)))) + Kvalues = [ ... + 0.70710678118655 0.63245553203368 + 0.61357199107790 0.60883391251775 ... + 0.60764825625617 0.60735177014130 + 0.60727764409353 0.60725911229889 ... + 0.60725447933256 0.60725332108988 + 0.60725303152913 0.60725295913894 ... + 0.60725294104140 0.60725293651701 + 0.60725293538591 0.60725293510314 ... + 0.60725293503245 0.60725293501477 + 0.60725293501035 0.60725293500925 ... + 0.60725293500897 0.60725293500890 + 0.60725293500889 0.60725293500888 ]; + Kn = Kvalues(min(n, length(Kvalues))); + + % Initialize loop variables: + v = [1;0]; % start with 2-vector cosine and sine of zero + poweroftwo = 1; + angle = angles(1); + + % Iterations + for j = 0:n-1; + if beta < 0 + sigma = -1; + else + sigma = 1; + end + factor = sigma * poweroftwo; + % Note the matrix multiplication can be done using scaling by + % powers of two and addition subtraction + R = [1, -factor; factor, 1]; + v = R * v; % 2-by-2 matrix multiply + beta = beta - sigma * angle; % update the remaining angle + poweroftwo = poweroftwo / 2; + % update the angle from table, or eventually by just dividing by two + if j+2 > length(angles) + angle = angle / 2; + else + angle = angles(j+2); + end + end + + % Adjust length of output vector to be [cos(beta), sin(beta)]: + v = v * Kn; + return + + endfunction + +2x2 matrix multiply can be done with shifts and adds: + + x = v[0] - sigma * (v[1] * 2^(-j)); + y = sigma * (v[0] * 2^(-j)) + v[1]; + v = [x; y]; + +The technique is outlined in a paper as being applicable to 3D: + + # Expensive 3-operand OP32 operations 3-operand operations are extremely expensive in terms of OP32 encoding space. A potential idea is to embed 3 RVC register formats across two out of three 5-bit fields rs1/rs2/rd diff --git a/simple_v_extension/vector_ops/discussion.mdwn b/simple_v_extension/vector_ops/discussion.mdwn new file mode 100644 index 000000000..968dccbd9 --- /dev/null +++ b/simple_v_extension/vector_ops/discussion.mdwn @@ -0,0 +1,126 @@ +# CORDIC Implementations + +From + + circular = 1 + linear = 0 + hyperbolic = -1 + + def ROM_lookup(iteration, coord_mode): + if (coord_mode == circular): + return math.degrees(math.atan(2**(-1*iteration))) + elif (coord_mode == linear): + return 2**(-1*iteration) + elif (coord_mode == hyperbolic): + return (math.atanh(2**(-1*iteration))) + + def rotation_mode(x, y, z, coord_mode, iterations): + a = 0.607252935; # = 1/K + + x_val_list = [] + y_val_list = [] + z_val_list = [] + iterations_list = [] + + i = 0; # Keeps count on number of iterations + + current_x = x # Value of X on ith iteration + current_y = y # Value of Y on ith iteration + current_z = z # Value of Z on ith iteration + + di = 0 + + if (coord_mode == hyperbolic): + i = 1 + else: + i = 0 + + flag = 0 + + if (iterations > 0): + while (i < iterations): + if (current_z < 0): + di = -1 + else: + di = +1 + next_z = current_z - di * ROM_lookup(i, coord_mode) + next_x = current_x - coord_mode * di * current_y * (2**(-1*i)) + next_y = current_y + di * current_x * 2**(-1*i) + + current_x = next_x + current_y = next_y + current_z = next_z + + x_val_list.append(current_x) + y_val_list.append(current_y) + z_val_list.append(current_z) + + iterations_list.append(i) + + if (coord_mode == hyperbolic): + if ((i != 4) & (i != 13) & (i!=40)): + i = i+1 + elif (flag == 0): + flag = 1 + elif (flag == 1): + flag = 0 + i = i+1 + else: + i = i+1 + return { 'x':x_val_list, 'y':y_val_list, 'z':z_val_list, + 'iteration':iterations_list, } + + def vector_mode(x, y, z, coord_mode, iterations): + a = 1.2075; # = 1/K + + x_val_list = [] + y_val_list = [] + z_val_list = [] + iterations_list = [] + + i = 0; # Keeps count on number of iterations + + current_x = x # Value of X on ith iteration + current_y = y # Value of Y on ith iteration + current_z = z # Value of Z on ith iteration + + di = 0 + + # This is neccesary since result for i=0 doesn't exists for hyperbolic + # co-ordinate system. + if (coord_mode == hyperbolic): + i = 1 + else: + i = 0 + + flag = 0 + + if (iterations > 0): + while (i < iterations): + di = -1*math.copysign(1, current_y);#*current_x); + next_x = current_x - coord_mode * di * current_y * (2**(-1*i)) + next_y = current_y + di * current_x * 2**(-1*i) + next_z = current_z - di * ROM_lookup(i, coord_mode) + + current_x = next_x + current_y = next_y + current_z = next_z + + x_val_list.append(current_x) + y_val_list.append(current_y) + z_val_list.append(current_z) + + iterations_list.append(i) + + if (coord_mode == hyperbolic): + if ((i != 4) & (i != 13) & (i!=40)): + i = i+1 + elif (flag == 0): + flag = 1 + elif (flag == 1): + flag = 0 + i = i+1 + else: + i = i+1 + return { 'x':x_val_list, 'y':y_val_list, 'z':z_val_list, + 'iteration':iterations_list }