1 /* from hacker's delight, originsl by hannah suarez (hcs0) */
2 // Computes the m+n-halfword product of n halfwords x m halfwords, unsigned.
3 // Max line length is 57, to fit in hacker.book.
5 #include <stdlib.h> //To define "exit", req'd by XLC.
7 // w[0], u[0], and v[0] contain the LEAST significant halfwords.
8 // (The halfwords are in little-endian order).
9 // This is Knuth's Algorithm M from [Knuth Vol. 2 Third edition (1998)]
10 // section 4.3.1. Picture is:
11 // u[m-1] ... u[1] u[0]
12 // x v[n-1] ... v[1] v[0]
13 // --------------------
14 // w[m+n-1] ............ w[1] w[0]
16 void mulmnu(unsigned short w
[], unsigned short u
[], unsigned short v
[], int m
,
23 for (i
= 0; i
< m
; i
++)
26 for (j
= 0; j
< n
; j
++)
29 unsigned short phi
[2000];
30 unsigned short plo
[2000];
31 for (i
= 0; i
< m
; i
++)
33 unsigned product
= (unsigned)u
[i
] * v
[j
] + w
[i
+ j
];
34 phi
[i
] = product
>> 16;
37 for (i
= 0; i
< m
; i
++)
39 t
= (((unsigned)phi
[i
] << 16) | plo
[i
]) + k
;
40 w
[i
+ j
] = t
; // (I.e., t & 0xFFFF).
50 void check(unsigned short result
[], unsigned short u
[], unsigned short v
[],
51 int m
, int n
, unsigned short correct
[])
55 for (i
= 0; i
< m
+ n
; i
++)
57 if (correct
[i
] != result
[i
])
60 printf("Error, m = %d, n = %d, u = ", m
, n
);
61 for (j
= 0; j
< m
; j
++)
62 printf(" %04x", u
[j
]);
64 for (j
= 0; j
< n
; j
++)
65 printf(" %04x", v
[j
]);
66 printf("\nShould get:");
67 for (j
= 0; j
< n
+ m
; j
++)
68 printf(" %04x", correct
[j
]);
70 for (j
= 0; j
< n
+ m
; j
++)
71 printf(" %04x", result
[j
]);
83 unsigned short u
[4], v
[4], correct
[8];
86 {.m
=1, .n
=1, .u
={7}, .v
={3}, .correct
={21, 0}},
87 {.m
=1, .n
=1, .u
={2}, .v
={0xFFFF}, .correct
={0xFFFE, 0x0001}}, // 2*FFFF = 0001_FFFE.
88 {.m
=1, .n
=1, .u
={0xFFFF}, .v
={0xFFFF}, .correct
={1, 0xFFFE}},
89 {.m
=1, .n
=2, .u
={7}, .v
={5, 6}, .correct
={35, 42, 0}},
90 {.m
=1, .n
=2, .u
={65000}, .v
={63000, 64000}, .correct
={0xBDC0, 0x8414, 0xF7F5}},
91 {.m
=1, .n
=3, .u
={65535}, .v
={31000, 32000, 33000}, .correct
={0x86E8, 0xFC17, 0xFC17, 0x80E7}},
92 {.m
=2, .n
=3, .u
={400, 300}, .v
={500, 100, 200}, .correct
={0x0D40, 0xE633, 0xADB2, 0xEA61, 0}},
93 {.m
=2, .n
=3, .u
={400, 65535}, .v
={500, 100, 65534}, .correct
={0x0D40, 0x9A4F, 0xFE70, 0x01F5, 0xFFFD}},
94 {.m
=4, .n
=4, .u
={65535, 65535, 65535, 65535}, .v
={65535, 65535, 65535, 65535},
95 .correct
={1, 0, 0, 0, 65534, 65535, 65535, 65535}},
96 {.m
=2, .n
=2, .u
={0xFF00, 0xFF00}, .v
={0xFF00, 0xFF00}, .correct
={0, 0xfe01, 0xfc02, 0xfe02}},
99 const int ncases
= sizeof(test
) / sizeof(test
[0]);
100 unsigned short result
[10];
103 for (int i
= 0; i
< ncases
; i
++)
105 int m
= test
[i
].m
, n
= test
[i
].n
;
106 unsigned short *u
= test
[i
].u
;
107 unsigned short *v
= test
[i
].v
;
108 unsigned short *correct
= test
[i
].correct
;
109 mulmnu(result
, u
, v
, m
, n
);
110 check(result
, u
, v
, m
, n
, correct
);
111 mulmnu(result
, v
, u
, n
, m
); // Interchange operands.
112 check(result
, v
, u
, n
, m
, correct
);
116 printf("Passed all %d cases.\n", ncases
);