From: Aaron Sawdey Date: Mon, 30 Jan 2017 23:24:24 +0000 (+0000) Subject: re PR target/79170 (memcmp builtin expansion sequence can overflow) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3095f651230f042648ac9c5d6008e8ea9d37567d;p=gcc.git re PR target/79170 (memcmp builtin expansion sequence can overflow) 2017-01-27 Aaron Sawdey PR target/79170 * gcc.dg/memcmp-1.c: Improved to catch failures seen in PR 79170. 2017-01-27 Aaron Sawdey PR target/79170 * config/rs6000/altivec.md (*setb_internal): Rename to setb_signed. (setb_unsigned) New pattern for setb with CCUNS. * config/rs6000/rs6000.c (expand_block_compare): Use a different subfc./subfe sequence to avoid overflow problems. Generate a shorter sequence with cmpld/setb for power9. * config/rs6000/rs6000.md (subf3_carry_dot2): Add a new pattern for generating subfc. instruction. (cmpstrsi): Add TARGET_POPCNTD predicate as the generate sequence now uses this instruction. From-SVN: r245041 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7a9d9b1c097..64ef77decda 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2017-01-30 Aaron Sawdey + + PR target/79170 + * config/rs6000/altivec.md (*setb_internal): Rename to setb_signed. + (setb_unsigned) New pattern for setb with CCUNS. + * config/rs6000/rs6000.c (expand_block_compare): Use a different + subfc./subfe sequence to avoid overflow problems. Generate a + shorter sequence with cmpld/setb for power9. + * config/rs6000/rs6000.md (subf3_carry_dot2): Add a new pattern + for generating subfc. instruction. + (cmpstrsi): Add TARGET_POPCNTD predicate as the generate sequence + now uses this instruction. + 2017-01-30 Ian Lance Taylor PR debug/79289 diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 8af7ac5ea7a..3a659b0b86c 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -3838,7 +3838,7 @@ ;; Otherwise, set operand 0 to 0. Note that the result stored into ;; register operand 0 is non-zero iff either the LT or GT bits are on ;; within condition register operand 1. -(define_insn "*setb_internal" +(define_insn "setb_signed" [(set (match_operand:SI 0 "gpc_reg_operand" "=r") (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") (const_int 0)) @@ -3851,6 +3851,19 @@ "setb %0,%1" [(set_attr "type" "logical")]) +(define_insn "setb_unsigned" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gtu (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + ;; Test byte within two ranges. ;; ;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 124f562185d..5ea59a439f5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -17292,7 +17292,7 @@ rs6000_init_builtins (void) TYPE_NAME (V16QI_type_node) = tdecl; tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node); - TYPE_NAME ( bool_V16QI_type_node) = tdecl; + TYPE_NAME (bool_V16QI_type_node) = tdecl; tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node); TYPE_NAME (unsigned_V8HI_type_node) = tdecl; @@ -19458,24 +19458,31 @@ expand_block_compare (rtx operands[]) rtx src1 = orig_src1; rtx src2 = orig_src2; - /* If this is not a fixed size compare, just call memcmp */ + /* This case is complicated to handle because the subtract + with carry instructions do not generate the 64-bit + carry and so we must emit code to calculate it ourselves. + We choose not to implement this yet. */ + if (TARGET_32BIT && TARGET_POWERPC64) + return false; + + /* If this is not a fixed size compare, just call memcmp. */ if (!CONST_INT_P (bytes_rtx)) return false; - /* This must be a fixed size alignment */ + /* This must be a fixed size alignment. */ if (!CONST_INT_P (align_rtx)) return false; unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; - /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */ + /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */ if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1)) || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2))) return false; gcc_assert (GET_MODE (target) == SImode); - /* Anything to move? */ + /* Anything to move? */ unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); if (bytes == 0) return true; @@ -19490,6 +19497,13 @@ expand_block_compare (rtx operands[]) rtx tmp_reg_src1 = gen_reg_rtx (word_mode); rtx tmp_reg_src2 = gen_reg_rtx (word_mode); + /* P7/P8 code uses cond for subfc. but P9 uses + it for cmpld which needs CCUNSmode. */ + rtx cond; + if (TARGET_P9_MISC) + cond = gen_reg_rtx (CCUNSmode); + else + cond = gen_reg_rtx (CCmode); /* If we have an LE target without ldbrx and word_mode is DImode, then we must avoid using word_mode. */ @@ -19512,27 +19526,35 @@ expand_block_compare (rtx operands[]) rtx convert_label = NULL; rtx final_label = NULL; - /* Example of generated code for 11 bytes aligned 1 byte: - .L10: - ldbrx 10,6,9 - ldbrx 9,7,9 - subf. 9,9,10 - bne 0,.L8 - addi 9,4,7 - lwbrx 10,0,9 - addi 9,5,7 - lwbrx 9,0,9 + /* Example of generated code for 18 bytes aligned 1 byte. + Compiled with -fno-reorder-blocks for clarity. + ldbrx 10,31,8 + ldbrx 9,7,8 + subfc. 9,9,10 + bne 0,.L6487 + addi 9,12,8 + addi 5,11,8 + ldbrx 10,0,9 + ldbrx 9,0,5 + subfc. 9,9,10 + bne 0,.L6487 + addi 9,12,16 + lhbrx 10,0,9 + addi 9,11,16 + lhbrx 9,0,9 subf 9,9,10 - b .L9 - .L8: # convert_label - cntlzd 9,9 - addi 9,9,-1 - xori 9,9,0x3f - .L9: # final_label - - We start off with DImode and have a compare/branch to something - with a smaller mode then we will need a block with the DI->SI conversion - that may or may not be executed. */ + b .L6488 + .p2align 4,,15 + .L6487: #convert_label + popcntd 9,9 + subfe 10,10,10 + or 9,9,10 + .L6488: #final_label + extsw 10,9 + + We start off with DImode for two blocks that jump to the DI->SI conversion + if the difference is found there, then a final block of HImode that skips + the DI->SI conversion. */ while (bytes > 0) { @@ -19600,26 +19622,18 @@ expand_block_compare (rtx operands[]) } } - /* We previously did a block that need 64->32 conversion but - the current block does not, so a label is needed to jump - to the end. */ - if (generate_6432_conversion && !final_label - && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size) - final_label = gen_label_rtx (); - - /* Do we need a 64->32 conversion block? */ int remain = bytes - cmp_bytes; - if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode)) - { - generate_6432_conversion = true; - if (remain > 0 && !convert_label) - convert_label = gen_label_rtx (); - } - - if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode)) + if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode)) { /* Target is larger than load size so we don't need to reduce result size. */ + + /* We previously did a block that need 64->32 conversion but + the current block does not, so a label is needed to jump + to the end. */ + if (generate_6432_conversion && !final_label) + final_label = gen_label_rtx (); + if (remain > 0) { /* This is not the last block, branch to the end if the result @@ -19627,11 +19641,12 @@ expand_block_compare (rtx operands[]) if (!final_label) final_label = gen_label_rtx (); rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); - rtx cond = gen_reg_rtx (CCmode); rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); - rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond); - emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); - rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); + rtx cr = gen_reg_rtx (CCmode); + rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr); + emit_insn (gen_movsi (target, + gen_lowpart (SImode, tmp_reg_src2))); + rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx); rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, fin_ref, pc_rtx); rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); @@ -19662,7 +19677,11 @@ expand_block_compare (rtx operands[]) } else { + /* Do we need a 64->32 conversion block? We need the 64->32 + conversion even if target size == load_mode size because + the subtract generates one extra bit. */ generate_6432_conversion = true; + if (remain > 0) { if (!convert_label) @@ -19670,9 +19689,22 @@ expand_block_compare (rtx operands[]) /* Compare to zero and branch to convert_label if not zero. */ rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label); - rtx cond = gen_reg_rtx (CCmode); - rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2); - rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond); + if (TARGET_P9_MISC) + { + /* Generate a compare, and convert with a setb later. */ + rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, + tmp_reg_src2); + emit_insn (gen_rtx_SET (cond, cmp)); + } + else + /* Generate a subfc. and use the longer + sequence for conversion. */ + if (TARGET_64BIT) + emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1, cond)); + else + emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1, cond)); rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, cvt_ref, pc_rtx); @@ -19682,10 +19714,21 @@ expand_block_compare (rtx operands[]) } else { - /* Just do the subtract. Since this is the last block the - convert code will be generated immediately following. */ - emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1, - tmp_reg_src2)); + /* Just do the subtract/compare. Since this is the last block + the convert code will be generated immediately following. */ + if (TARGET_P9_MISC) + { + rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, + tmp_reg_src2); + emit_insn (gen_rtx_SET (cond, cmp)); + } + else + if (TARGET_64BIT) + emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1)); + else + emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2, + tmp_reg_src1)); } } @@ -19699,12 +19742,46 @@ expand_block_compare (rtx operands[]) emit_label (convert_label); /* We need to produce DI result from sub, then convert to target SI - while maintaining <0 / ==0 / >0 properties. - Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */ - emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2)); - emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1))); - emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63))); - emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); + while maintaining <0 / ==0 / >0 properties. This sequence works: + subfc L,A,B + subfe H,H,H + popcntd L,L + rldimi L,H,6,0 + + This is an alternate one Segher cooked up if somebody + wants to expand this for something that doesn't have popcntd: + subfc L,a,b + subfe H,x,x + addic t,L,-1 + subfe v,t,L + or z,v,H + + And finally, p9 can just do this: + cmpld A,B + setb r */ + + if (TARGET_P9_MISC) + { + emit_insn (gen_setb_unsigned (target, cond)); + } + else + { + if (TARGET_64BIT) + { + rtx tmp_reg_ca = gen_reg_rtx (DImode); + emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca)); + emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2)); + emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca)); + emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); + } + else + { + rtx tmp_reg_ca = gen_reg_rtx (SImode); + emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca)); + emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2)); + emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca)); + } + } } if (final_label) @@ -21246,7 +21323,7 @@ register_to_reg_type (rtx reg, bool *is_altivec) regno = true_regnum (reg); if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) return PSEUDO_REG_TYPE; - } + } gcc_assert (regno >= 0); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 3f292211463..61759949725 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2068,6 +2068,35 @@ "subfic %0,%1,%2" [(set_attr "type" "add")]) +(define_insn_and_split "subf3_carry_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") + (compare:CC (minus:P (match_operand:P 2 "gpc_reg_operand" "r,r") + (match_operand:P 1 "gpc_reg_operand" "r,r")) + (const_int 0))) + (set (match_operand:P 0 "gpc_reg_operand" "=r,r") + (minus:P (match_dup 2) + (match_dup 1))) + (set (reg:P CA_REGNO) + (leu:P (match_dup 1) + (match_dup 2)))] + "mode == Pmode" + "@ + subfc. %0,%1,%2 + #" + "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" + [(parallel [(set (match_dup 0) + (minus:P (match_dup 2) + (match_dup 1))) + (set (reg:P CA_REGNO) + (leu:P (match_dup 1) + (match_dup 2)))]) + (set (match_dup 3) + (compare:CC (match_dup 0) + (const_int 0)))] + "" + [(set_attr "type" "add") + (set_attr "dot" "yes") + (set_attr "length" "4,8")]) (define_insn "subf3_carry" [(set (match_operand:P 0 "gpc_reg_operand" "=r") @@ -9146,11 +9175,11 @@ (match_operand:BLK 2))) (use (match_operand:SI 3)) (use (match_operand:SI 4))])] - "" + "TARGET_POPCNTD" { if (expand_block_compare (operands)) DONE; - else + else FAIL; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 73dfce2ba03..67800b1104f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-01-30 Aaron Sawdey + + PR target/79170 + * gcc.dg/memcmp-1.c: Improved to catch failures seen in PR 79170. + 2017-01-30 Martin Sebor PR testsuite/79293 diff --git a/gcc/testsuite/gcc.dg/memcmp-1.c b/gcc/testsuite/gcc.dg/memcmp-1.c index dae13e06f88..b4fd780fdd0 100644 --- a/gcc/testsuite/gcc.dg/memcmp-1.c +++ b/gcc/testsuite/gcc.dg/memcmp-1.c @@ -1,58 +1,124 @@ -/* Test memcmp builtin expansion for compilation and proper execution. */ +/* Test memcmp/strncmp builtin expansion for compilation and proper execution. */ /* { dg-do run } */ /* { dg-options "-O2" } */ /* { dg-require-effective-target ptr32plus } */ #include -#include #include +#include +#include + +int lib_memcmp(const void *a, const void *b, size_t n) asm("memcmp"); +int lib_strncmp(const char *a, const char *b, size_t n) asm("strncmp"); + +#ifndef NRAND +#define NRAND 10000 +#endif +#define MAX_SZ 200 -#define RUN_TEST(SZ, ALIGN) test_memcmp_ ## SZ ## _ ## ALIGN () +static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, int), + void (test_strncmp)(const char *, const char *, int), + size_t sz, int align) +{ + char buf1[MAX_SZ*2+10],buf2[MAX_SZ*2+10]; + size_t test_sz = (sz10)?(test_sz-10):0); diff_pos < test_sz+10; diff_pos++) + for(zero_pos = ((test_sz>10)?(test_sz-10):0); zero_pos < test_sz+10; zero_pos++) + { + memset(buf1, 'A', 2*test_sz); + memset(buf2, 'A', 2*test_sz); + buf2[diff_pos] = 'B'; + buf1[zero_pos] = 0; + buf2[zero_pos] = 0; + e = lib_memcmp(buf1,buf2,sz); + (*test_memcmp)(buf1,buf2,e); + (*test_memcmp)(buf2,buf1,-e); + (*test_memcmp)(buf2,buf2,0); + e = lib_strncmp(buf1,buf2,sz); + (*test_strncmp)(buf1,buf2,e); + (*test_strncmp)(buf2,buf1,-e); + (*test_strncmp)(buf2,buf2,0); + /* differing length: */ + buf2[diff_pos] = 0; + e = lib_memcmp(buf1,buf2,sz); + (*test_memcmp)(buf1,buf2,e); + e = lib_strncmp(buf1,buf2,sz); + (*test_strncmp)(buf1,buf2,e); + memset(buf2+diff_pos,'B',sizeof(buf2)-diff_pos); + buf2[zero_pos] = 0; + e = lib_memcmp(buf1,buf2,sz); + (*test_memcmp)(buf1,buf2,e); + (*test_memcmp)(buf2,buf1,-e); + e = lib_strncmp(buf1,buf2,sz); + (*test_strncmp)(buf1,buf2,e); + (*test_strncmp)(buf2,buf1,-e); + } +} -#define DEF_TEST(SZ, ALIGN) \ -static void test_memcmp_ ## SZ ## _ ## ALIGN (void) { \ - char one[3 * (SZ > 10 ? SZ : 10)]; \ - char two[3 * (SZ > 10 ? SZ : 10)]; \ - int i,j; \ - for (i = 0 ; i < SZ ; i++) \ - { \ - int r1; \ - char *a = one + (i & 1) * ALIGN; \ - char *b = two + (i & 1) * ALIGN; \ - memset (a, '-', SZ); \ - memset (b, '-', SZ); \ - a[i] = '1'; \ - b[i] = '2'; \ - a[SZ] = 0; \ - b[SZ] = 0; \ - if (!((r1 = memcmp (b, a, SZ)) > 0)) \ - { \ - abort (); \ - } \ - if (!((r1 = memcmp (a, b, SZ)) < 0)) \ - { \ - abort (); \ - } \ - b[i] = '1'; \ - if (!((r1 = memcmp (a, b, SZ)) == 0)) \ - { \ - abort (); \ - } \ - for(j = i; j < SZ ; j++) \ - { \ - a[j] = '1'; \ - b[j] = '2'; \ - } \ - if (!((r1 = memcmp (b, a, SZ)) > 0)) \ - { \ - abort (); \ - } \ - if (!((r1 = memcmp (a, b, SZ)) < 0)) \ - { \ - abort (); \ - } \ - } \ -} +#define RUN_TEST(SZ, ALIGN) test_driver_memcmp (test_memcmp_ ## SZ ## _ ## ALIGN, test_strncmp_ ## SZ ## _ ## ALIGN, SZ, ALIGN); + +#define DEF_TEST(SZ, ALIGN) \ + static void test_memcmp_ ## SZ ## _ ## ALIGN (const char *str1, const char *str2, int expect) \ +{ \ + char three[8192] __attribute__ ((aligned (4096))); \ + char four[8192] __attribute__ ((aligned (4096))); \ + char *a, *b; \ + int i,j,r; \ + for (j = 0; j < 2; j++) \ + { \ + for (i = 0; i < 2; i++) \ + { \ + a = three+i*ALIGN+j*(4096-2*i*ALIGN); \ + b = four+i*ALIGN+j*(4096-2*i*ALIGN); \ + memcpy(a,str1,SZ); \ + memcpy(b,str2,SZ); \ + r = memcmp(a,b,SZ); \ + if ( r < 0 && !(expect < 0) ) abort(); \ + if ( r > 0 && !(expect > 0) ) abort(); \ + if ( r == 0 && !(expect == 0) ) abort(); \ + } \ + } \ +} \ +static void test_strncmp_ ## SZ ## _ ## ALIGN (const char *str1, const char *str2, int expect) \ +{ \ + char three[8192] __attribute__ ((aligned (4096))); \ + char four[8192] __attribute__ ((aligned (4096))); \ + char *a, *b; \ + int i,j,r; \ + for (j = 0; j < 2; j++) \ + { \ + for (i = 0; i < 2; i++) \ + { \ + a = three+i*ALIGN+j*(4096-2*i*ALIGN); \ + b = four+i*ALIGN+j*(4096-2*i*ALIGN); \ + strcpy(a,str1); \ + strcpy(b,str2); \ + r = strncmp(a,b,SZ); \ + if ( r < 0 && !(expect < 0) ) abort(); \ + if ( r > 0 && !(expect > 0) ) abort(); \ + if ( r == 0 && !(expect == 0) ) abort(); \ + } \ + } \ +} #ifdef TEST_ALL DEF_TEST(1,1) @@ -300,305 +366,302 @@ DEF_TEST(49,2) DEF_TEST(49,4) DEF_TEST(49,8) DEF_TEST(49,16) +DEF_TEST(100,1) +DEF_TEST(100,2) +DEF_TEST(100,4) +DEF_TEST(100,8) +DEF_TEST(100,16) #else DEF_TEST(3,1) DEF_TEST(4,1) -DEF_TEST(4,2) -DEF_TEST(4,4) DEF_TEST(5,1) +DEF_TEST(5,8) DEF_TEST(6,1) +DEF_TEST(6,4) +DEF_TEST(6,8) DEF_TEST(7,1) +DEF_TEST(7,2) +DEF_TEST(7,4) +DEF_TEST(7,8) DEF_TEST(8,1) -DEF_TEST(8,2) -DEF_TEST(8,4) -DEF_TEST(8,8) DEF_TEST(9,1) DEF_TEST(16,1) -DEF_TEST(16,2) -DEF_TEST(16,4) -DEF_TEST(16,8) -DEF_TEST(16,16) DEF_TEST(32,1) -DEF_TEST(32,2) -DEF_TEST(32,4) -DEF_TEST(32,8) -DEF_TEST(32,16) +DEF_TEST(100,1) +DEF_TEST(100,8) #endif int main(int argc, char **argv) { - #ifdef TEST_ALL - RUN_TEST(1,1); - RUN_TEST(1,2); - RUN_TEST(1,4); - RUN_TEST(1,8); - RUN_TEST(1,16); - RUN_TEST(2,1); - RUN_TEST(2,2); - RUN_TEST(2,4); - RUN_TEST(2,8); - RUN_TEST(2,16); - RUN_TEST(3,1); - RUN_TEST(3,2); - RUN_TEST(3,4); - RUN_TEST(3,8); - RUN_TEST(3,16); - RUN_TEST(4,1); - RUN_TEST(4,2); - RUN_TEST(4,4); - RUN_TEST(4,8); - RUN_TEST(4,16); - RUN_TEST(5,1); - RUN_TEST(5,2); - RUN_TEST(5,4); - RUN_TEST(5,8); - RUN_TEST(5,16); - RUN_TEST(6,1); - RUN_TEST(6,2); - RUN_TEST(6,4); - RUN_TEST(6,8); - RUN_TEST(6,16); - RUN_TEST(7,1); - RUN_TEST(7,2); - RUN_TEST(7,4); - RUN_TEST(7,8); - RUN_TEST(7,16); - RUN_TEST(8,1); - RUN_TEST(8,2); - RUN_TEST(8,4); - RUN_TEST(8,8); - RUN_TEST(8,16); - RUN_TEST(9,1); - RUN_TEST(9,2); - RUN_TEST(9,4); - RUN_TEST(9,8); - RUN_TEST(9,16); - RUN_TEST(10,1); - RUN_TEST(10,2); - RUN_TEST(10,4); - RUN_TEST(10,8); - RUN_TEST(10,16); - RUN_TEST(11,1); - RUN_TEST(11,2); - RUN_TEST(11,4); - RUN_TEST(11,8); - RUN_TEST(11,16); - RUN_TEST(12,1); - RUN_TEST(12,2); - RUN_TEST(12,4); - RUN_TEST(12,8); - RUN_TEST(12,16); - RUN_TEST(13,1); - RUN_TEST(13,2); - RUN_TEST(13,4); - RUN_TEST(13,8); - RUN_TEST(13,16); - RUN_TEST(14,1); - RUN_TEST(14,2); - RUN_TEST(14,4); - RUN_TEST(14,8); - RUN_TEST(14,16); - RUN_TEST(15,1); - RUN_TEST(15,2); - RUN_TEST(15,4); - RUN_TEST(15,8); - RUN_TEST(15,16); - RUN_TEST(16,1); - RUN_TEST(16,2); - RUN_TEST(16,4); - RUN_TEST(16,8); - RUN_TEST(16,16); - RUN_TEST(17,1); - RUN_TEST(17,2); - RUN_TEST(17,4); - RUN_TEST(17,8); - RUN_TEST(17,16); - RUN_TEST(18,1); - RUN_TEST(18,2); - RUN_TEST(18,4); - RUN_TEST(18,8); - RUN_TEST(18,16); - RUN_TEST(19,1); - RUN_TEST(19,2); - RUN_TEST(19,4); - RUN_TEST(19,8); - RUN_TEST(19,16); - RUN_TEST(20,1); - RUN_TEST(20,2); - RUN_TEST(20,4); - RUN_TEST(20,8); - RUN_TEST(20,16); - RUN_TEST(21,1); - RUN_TEST(21,2); - RUN_TEST(21,4); - RUN_TEST(21,8); - RUN_TEST(21,16); - RUN_TEST(22,1); - RUN_TEST(22,2); - RUN_TEST(22,4); - RUN_TEST(22,8); - RUN_TEST(22,16); - RUN_TEST(23,1); - RUN_TEST(23,2); - RUN_TEST(23,4); - RUN_TEST(23,8); - RUN_TEST(23,16); - RUN_TEST(24,1); - RUN_TEST(24,2); - RUN_TEST(24,4); - RUN_TEST(24,8); - RUN_TEST(24,16); - RUN_TEST(25,1); - RUN_TEST(25,2); - RUN_TEST(25,4); - RUN_TEST(25,8); - RUN_TEST(25,16); - RUN_TEST(26,1); - RUN_TEST(26,2); - RUN_TEST(26,4); - RUN_TEST(26,8); - RUN_TEST(26,16); - RUN_TEST(27,1); - RUN_TEST(27,2); - RUN_TEST(27,4); - RUN_TEST(27,8); - RUN_TEST(27,16); - RUN_TEST(28,1); - RUN_TEST(28,2); - RUN_TEST(28,4); - RUN_TEST(28,8); - RUN_TEST(28,16); - RUN_TEST(29,1); - RUN_TEST(29,2); - RUN_TEST(29,4); - RUN_TEST(29,8); - RUN_TEST(29,16); - RUN_TEST(30,1); - RUN_TEST(30,2); - RUN_TEST(30,4); - RUN_TEST(30,8); - RUN_TEST(30,16); - RUN_TEST(31,1); - RUN_TEST(31,2); - RUN_TEST(31,4); - RUN_TEST(31,8); - RUN_TEST(31,16); - RUN_TEST(32,1); - RUN_TEST(32,2); - RUN_TEST(32,4); - RUN_TEST(32,8); - RUN_TEST(32,16); - RUN_TEST(33,1); - RUN_TEST(33,2); - RUN_TEST(33,4); - RUN_TEST(33,8); - RUN_TEST(33,16); - RUN_TEST(34,1); - RUN_TEST(34,2); - RUN_TEST(34,4); - RUN_TEST(34,8); - RUN_TEST(34,16); - RUN_TEST(35,1); - RUN_TEST(35,2); - RUN_TEST(35,4); - RUN_TEST(35,8); - RUN_TEST(35,16); - RUN_TEST(36,1); - RUN_TEST(36,2); - RUN_TEST(36,4); - RUN_TEST(36,8); - RUN_TEST(36,16); - RUN_TEST(37,1); - RUN_TEST(37,2); - RUN_TEST(37,4); - RUN_TEST(37,8); - RUN_TEST(37,16); - RUN_TEST(38,1); - RUN_TEST(38,2); - RUN_TEST(38,4); - RUN_TEST(38,8); - RUN_TEST(38,16); - RUN_TEST(39,1); - RUN_TEST(39,2); - RUN_TEST(39,4); - RUN_TEST(39,8); - RUN_TEST(39,16); - RUN_TEST(40,1); - RUN_TEST(40,2); - RUN_TEST(40,4); - RUN_TEST(40,8); - RUN_TEST(40,16); - RUN_TEST(41,1); - RUN_TEST(41,2); - RUN_TEST(41,4); - RUN_TEST(41,8); - RUN_TEST(41,16); - RUN_TEST(42,1); - RUN_TEST(42,2); - RUN_TEST(42,4); - RUN_TEST(42,8); - RUN_TEST(42,16); - RUN_TEST(43,1); - RUN_TEST(43,2); - RUN_TEST(43,4); - RUN_TEST(43,8); - RUN_TEST(43,16); - RUN_TEST(44,1); - RUN_TEST(44,2); - RUN_TEST(44,4); - RUN_TEST(44,8); - RUN_TEST(44,16); - RUN_TEST(45,1); - RUN_TEST(45,2); - RUN_TEST(45,4); - RUN_TEST(45,8); - RUN_TEST(45,16); - RUN_TEST(46,1); - RUN_TEST(46,2); - RUN_TEST(46,4); - RUN_TEST(46,8); - RUN_TEST(46,16); - RUN_TEST(47,1); - RUN_TEST(47,2); - RUN_TEST(47,4); - RUN_TEST(47,8); - RUN_TEST(47,16); - RUN_TEST(48,1); - RUN_TEST(48,2); - RUN_TEST(48,4); - RUN_TEST(48,8); - RUN_TEST(48,16); - RUN_TEST(49,1); - RUN_TEST(49,2); - RUN_TEST(49,4); - RUN_TEST(49,8); - RUN_TEST(49,16); + RUN_TEST(1,1) + RUN_TEST(1,2) + RUN_TEST(1,4) + RUN_TEST(1,8) + RUN_TEST(1,16) + RUN_TEST(2,1) + RUN_TEST(2,2) + RUN_TEST(2,4) + RUN_TEST(2,8) + RUN_TEST(2,16) + RUN_TEST(3,1) + RUN_TEST(3,2) + RUN_TEST(3,4) + RUN_TEST(3,8) + RUN_TEST(3,16) + RUN_TEST(4,1) + RUN_TEST(4,2) + RUN_TEST(4,4) + RUN_TEST(4,8) + RUN_TEST(4,16) + RUN_TEST(5,1) + RUN_TEST(5,2) + RUN_TEST(5,4) + RUN_TEST(5,8) + RUN_TEST(5,16) + RUN_TEST(6,1) + RUN_TEST(6,2) + RUN_TEST(6,4) + RUN_TEST(6,8) + RUN_TEST(6,16) + RUN_TEST(7,1) + RUN_TEST(7,2) + RUN_TEST(7,4) + RUN_TEST(7,8) + RUN_TEST(7,16) + RUN_TEST(8,1) + RUN_TEST(8,2) + RUN_TEST(8,4) + RUN_TEST(8,8) + RUN_TEST(8,16) + RUN_TEST(9,1) + RUN_TEST(9,2) + RUN_TEST(9,4) + RUN_TEST(9,8) + RUN_TEST(9,16) + RUN_TEST(10,1) + RUN_TEST(10,2) + RUN_TEST(10,4) + RUN_TEST(10,8) + RUN_TEST(10,16) + RUN_TEST(11,1) + RUN_TEST(11,2) + RUN_TEST(11,4) + RUN_TEST(11,8) + RUN_TEST(11,16) + RUN_TEST(12,1) + RUN_TEST(12,2) + RUN_TEST(12,4) + RUN_TEST(12,8) + RUN_TEST(12,16) + RUN_TEST(13,1) + RUN_TEST(13,2) + RUN_TEST(13,4) + RUN_TEST(13,8) + RUN_TEST(13,16) + RUN_TEST(14,1) + RUN_TEST(14,2) + RUN_TEST(14,4) + RUN_TEST(14,8) + RUN_TEST(14,16) + RUN_TEST(15,1) + RUN_TEST(15,2) + RUN_TEST(15,4) + RUN_TEST(15,8) + RUN_TEST(15,16) + RUN_TEST(16,1) + RUN_TEST(16,2) + RUN_TEST(16,4) + RUN_TEST(16,8) + RUN_TEST(16,16) + RUN_TEST(17,1) + RUN_TEST(17,2) + RUN_TEST(17,4) + RUN_TEST(17,8) + RUN_TEST(17,16) + RUN_TEST(18,1) + RUN_TEST(18,2) + RUN_TEST(18,4) + RUN_TEST(18,8) + RUN_TEST(18,16) + RUN_TEST(19,1) + RUN_TEST(19,2) + RUN_TEST(19,4) + RUN_TEST(19,8) + RUN_TEST(19,16) + RUN_TEST(20,1) + RUN_TEST(20,2) + RUN_TEST(20,4) + RUN_TEST(20,8) + RUN_TEST(20,16) + RUN_TEST(21,1) + RUN_TEST(21,2) + RUN_TEST(21,4) + RUN_TEST(21,8) + RUN_TEST(21,16) + RUN_TEST(22,1) + RUN_TEST(22,2) + RUN_TEST(22,4) + RUN_TEST(22,8) + RUN_TEST(22,16) + RUN_TEST(23,1) + RUN_TEST(23,2) + RUN_TEST(23,4) + RUN_TEST(23,8) + RUN_TEST(23,16) + RUN_TEST(24,1) + RUN_TEST(24,2) + RUN_TEST(24,4) + RUN_TEST(24,8) + RUN_TEST(24,16) + RUN_TEST(25,1) + RUN_TEST(25,2) + RUN_TEST(25,4) + RUN_TEST(25,8) + RUN_TEST(25,16) + RUN_TEST(26,1) + RUN_TEST(26,2) + RUN_TEST(26,4) + RUN_TEST(26,8) + RUN_TEST(26,16) + RUN_TEST(27,1) + RUN_TEST(27,2) + RUN_TEST(27,4) + RUN_TEST(27,8) + RUN_TEST(27,16) + RUN_TEST(28,1) + RUN_TEST(28,2) + RUN_TEST(28,4) + RUN_TEST(28,8) + RUN_TEST(28,16) + RUN_TEST(29,1) + RUN_TEST(29,2) + RUN_TEST(29,4) + RUN_TEST(29,8) + RUN_TEST(29,16) + RUN_TEST(30,1) + RUN_TEST(30,2) + RUN_TEST(30,4) + RUN_TEST(30,8) + RUN_TEST(30,16) + RUN_TEST(31,1) + RUN_TEST(31,2) + RUN_TEST(31,4) + RUN_TEST(31,8) + RUN_TEST(31,16) + RUN_TEST(32,1) + RUN_TEST(32,2) + RUN_TEST(32,4) + RUN_TEST(32,8) + RUN_TEST(32,16) + RUN_TEST(33,1) + RUN_TEST(33,2) + RUN_TEST(33,4) + RUN_TEST(33,8) + RUN_TEST(33,16) + RUN_TEST(34,1) + RUN_TEST(34,2) + RUN_TEST(34,4) + RUN_TEST(34,8) + RUN_TEST(34,16) + RUN_TEST(35,1) + RUN_TEST(35,2) + RUN_TEST(35,4) + RUN_TEST(35,8) + RUN_TEST(35,16) + RUN_TEST(36,1) + RUN_TEST(36,2) + RUN_TEST(36,4) + RUN_TEST(36,8) + RUN_TEST(36,16) + RUN_TEST(37,1) + RUN_TEST(37,2) + RUN_TEST(37,4) + RUN_TEST(37,8) + RUN_TEST(37,16) + RUN_TEST(38,1) + RUN_TEST(38,2) + RUN_TEST(38,4) + RUN_TEST(38,8) + RUN_TEST(38,16) + RUN_TEST(39,1) + RUN_TEST(39,2) + RUN_TEST(39,4) + RUN_TEST(39,8) + RUN_TEST(39,16) + RUN_TEST(40,1) + RUN_TEST(40,2) + RUN_TEST(40,4) + RUN_TEST(40,8) + RUN_TEST(40,16) + RUN_TEST(41,1) + RUN_TEST(41,2) + RUN_TEST(41,4) + RUN_TEST(41,8) + RUN_TEST(41,16) + RUN_TEST(42,1) + RUN_TEST(42,2) + RUN_TEST(42,4) + RUN_TEST(42,8) + RUN_TEST(42,16) + RUN_TEST(43,1) + RUN_TEST(43,2) + RUN_TEST(43,4) + RUN_TEST(43,8) + RUN_TEST(43,16) + RUN_TEST(44,1) + RUN_TEST(44,2) + RUN_TEST(44,4) + RUN_TEST(44,8) + RUN_TEST(44,16) + RUN_TEST(45,1) + RUN_TEST(45,2) + RUN_TEST(45,4) + RUN_TEST(45,8) + RUN_TEST(45,16) + RUN_TEST(46,1) + RUN_TEST(46,2) + RUN_TEST(46,4) + RUN_TEST(46,8) + RUN_TEST(46,16) + RUN_TEST(47,1) + RUN_TEST(47,2) + RUN_TEST(47,4) + RUN_TEST(47,8) + RUN_TEST(47,16) + RUN_TEST(48,1) + RUN_TEST(48,2) + RUN_TEST(48,4) + RUN_TEST(48,8) + RUN_TEST(48,16) + RUN_TEST(49,1) + RUN_TEST(49,2) + RUN_TEST(49,4) + RUN_TEST(49,8) + RUN_TEST(49,16) + RUN_TEST(100,1) + RUN_TEST(100,2) + RUN_TEST(100,4) + RUN_TEST(100,8) + RUN_TEST(100,16) #else - RUN_TEST(3,1); - RUN_TEST(4,1); - RUN_TEST(4,2); - RUN_TEST(4,4); - RUN_TEST(5,1); - RUN_TEST(6,1); - RUN_TEST(7,1); - RUN_TEST(8,1); - RUN_TEST(8,2); - RUN_TEST(8,4); - RUN_TEST(8,8); - RUN_TEST(9,1); - RUN_TEST(16,1); - RUN_TEST(16,2); - RUN_TEST(16,4); - RUN_TEST(16,8); - RUN_TEST(16,16); - RUN_TEST(32,1); - RUN_TEST(32,2); - RUN_TEST(32,4); - RUN_TEST(32,8); - RUN_TEST(32,16); + RUN_TEST(3,1) + RUN_TEST(4,1) + RUN_TEST(5,1) + RUN_TEST(5,8) + RUN_TEST(6,1) + RUN_TEST(6,4) + RUN_TEST(6,8) + RUN_TEST(7,1) + RUN_TEST(7,2) + RUN_TEST(7,4) + RUN_TEST(7,8) + RUN_TEST(8,1) + RUN_TEST(9,1) + RUN_TEST(16,1) + RUN_TEST(32,1) + RUN_TEST(100,1) + RUN_TEST(100,8) #endif - - return 0; }