From: Sandipan Das Date: Thu, 7 Jun 2018 05:24:51 +0000 (+0530) Subject: arch-power: Fix fixed-point arithmetic multiply and divide instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fa1a4f6352f673c4f2d8d1785bd7be2d1fb592bc;p=gem5.git arch-power: Fix fixed-point arithmetic multiply and divide instructions This fixes the following arithmetic instructions: * Multiply Low Immediate (mulli) * Multiply Low Word (mullw[o][.]) * Multiply High Word (mulhw[.]) * Multiply High Word Unsigned (mulhwu[.]) * Divide Word (divw[o][.]) * Divide Word Unsigned (divwu[o][.]) This also fixes disassembly generation for all of the above. Change-Id: I46fd3751b86a7436a962f8b93f26d8343f215fed Signed-off-by: Sandipan Das --- diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa index 765408a67..456c6be01 100644 --- a/src/arch/power/isa/decoder.isa +++ b/src/arch/power/isa/decoder.isa @@ -216,9 +216,8 @@ decode PO default Unknown::unknown() { true); 7: mulli({{ - int32_t src = Ra_sw; - int64_t prod = src * simm; - Rt = (uint32_t)prod; + int64_t res = Ra_sd * simm; + Rt = res; }}); } @@ -564,65 +563,47 @@ decode PO default Unknown::unknown() { // with destination register Rt. format IntArithOp { 75: mulhw({{ - int64_t prod = Ra_sd * Rb_sd; - Rt = prod >> 32; + uint64_t res = (int64_t)Ra_sw * Rb_sw; + res = res >> 32; + Rt = res; }}); + 11: mulhwu({{ - uint64_t prod = Ra_ud * Rb_ud; - Rt = prod >> 32; + uint64_t res = (uint64_t)Ra_uw * Rb_uw; + res = res >> 32; + Rt = res; }}); - 235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }}); - 747: mullwo({{ - int64_t src1 = Ra_sd; - int64_t src2 = Rb; - int64_t prod = src1 * src2; - Rt = prod; + + 235: mullw({{ + int64_t res = (int64_t)Ra_sw * Rb_sw; + if (res != (int32_t)res) { + setOV = true; + } + Rt = res; }}, true); 491: divw({{ int32_t src1 = Ra_sw; int32_t src2 = Rb_sw; - if ((src1 != 0x80000000 || src2 != 0xffffffff) - && src2 != 0) { - Rt = src1 / src2; - } else { - Rt = 0; - } - }}); - - 1003: divwo({{ - int32_t src1 = Ra_sw; - int32_t src2 = Rb_sw; - if ((src1 != 0x80000000 || src2 != 0xffffffff) - && src2 != 0) { - Rt = src1 / src2; + if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) { + Rt = (uint32_t)(src1 / src2); } else { Rt = 0; - divSetOV = true; + setOV = true; } }}, true); 459: divwu({{ - uint32_t src1 = Ra_sw; - uint32_t src2 = Rb_sw; + uint32_t src1 = Ra_uw; + uint32_t src2 = Rb_uw; if (src2 != 0) { Rt = src1 / src2; } else { Rt = 0; + setOV = true; } - }}); - - 971: divwuo({{ - uint32_t src1 = Ra_sw; - uint32_t src2 = Rb_sw; - if (src2 != 0) { - Rt = src1 / src2; - } else { - Rt = 0; - divSetOV = true; - } }}, true); } diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa index eac6db29b..a21deab56 100644 --- a/src/arch/power/isa/formats/integer.isa +++ b/src/arch/power/isa/formats/integer.isa @@ -106,17 +106,24 @@ computeOVCode = ''' } ''' -computeDivOVCode = ''' - if (divSetOV) { +setCACode = ''' + if (setCA) { + xer.ca = 1; + xer.ca32 = 1; + } else { + xer.ca = 0; + xer.ca32 = 0; + } +''' + +setOVCode = ''' + if (setOV) { xer.ov = 1; + xer.ov32 = 1; xer.so = 1; } else { - if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) { - xer.ov = 1; - xer.so = 1; - } else { - xer.ov = 0; - } + xer.ov = 0; + xer.ov32 = 0; } ''' @@ -319,10 +326,14 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0, // Instructions that use source registers Ra and Rb, with the result // placed into Rt. Basically multiply and divide instructions. The -// carry bit is never set, but overflow can be calculated. Division -// explicitly sets the overflow bit in certain situations and this is -// dealt with using the 'divSetOV' boolean in decoder.isa. We generate -// two versions of each instruction to deal with the Rc bit. +// carry bit is never set, but overflow can be calculated. In certain +// situations, the overflow bits have to be set and this is dealt with +// using the 'setOV' boolean in decoder.isa. +// +// In case overflow is to be calculated, we generate four versions of +// each instruction to deal with different combinations of having the +// OE bit set or unset and the Rc bit set or unset too. Otherwise, we +// generate two versions of each instruction to deal with the Rc bit. def format IntArithOp(code, computeOV = 0, inst_flags = []) {{ # The result is always in Rt, but the source values vary @@ -330,28 +341,54 @@ def format IntArithOp(code, computeOV = 0, inst_flags = []) {{ # Deal with setting the overflow flag if computeOV: - code = 'bool divSetOV = false;\n' + code - code += computeDivOVCode % dict + setXERCode - - # Setup the 2 code versions and add code to access XER if necessary - code_rc1 = readXERCode + code + computeCR0Code % dict - if computeOV: - code = readXERCode + code - - # Generate the classes - (header_output, decoder_output, decode_block, exec_output) = \ - GenAluOp(name, Name, 'IntOp', code, inst_flags, - CheckRcDecode, BasicConstructor) - - # Generate the second class - (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \ - GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags, - CheckRcDecode, IntRcConstructor) - - # Finally, add to the other outputs - header_output += header_output_rc1 - decoder_output += decoder_output_rc1 - exec_output += exec_output_rc1 + # Setup the 4 code versions and add code to access XER if necessary + code = 'bool setOV M5_VAR_USED = false;\n' + code + code_rc1 = readXERCode + code + computeCR0Code % dict + code_oe1 = readXERCode + code + setOVCode + setXERCode + code_rc1_oe1 = readXERCode + code + setOVCode + setXERCode + code_rc1_oe1 += computeCR0Code % dict + + # Generate the classes + (header_output, decoder_output, decode_block, exec_output) = \ + GenAluOp(name, Name, 'IntArithOp', code, inst_flags, + CheckRcOeDecode, BasicConstructor) + (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \ + GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags, + CheckRcOeDecode, IntRcConstructor) + (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \ + GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags, + CheckRcOeDecode, IntOeConstructor) + (header_output_rc1_oe1, decoder_output_rc1_oe1, _, + exec_output_rc1_oe1) = \ + GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1, + inst_flags, CheckRcOeDecode, IntRcOeConstructor) + + # Finally, add to the other outputs + header_output += \ + header_output_rc1 + header_output_oe1 + header_output_rc1_oe1 + decoder_output += \ + decoder_output_rc1 + decoder_output_oe1 + decoder_output_rc1_oe1 + exec_output += \ + exec_output_rc1 + exec_output_oe1 + exec_output_rc1_oe1 + + else: + # Setup the 2 code versions and add code to access XER if necessary + code_rc1 = readXERCode + code + computeCR0Code % dict + + # Generate the first class + (header_output, decoder_output, decode_block, exec_output) = \ + GenAluOp(name, Name, 'IntArithOp', code, inst_flags, + CheckRcDecode, BasicConstructor) + + # Generate the second class + (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \ + GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags, + CheckRcDecode, IntRcConstructor) + + # Finally, add to the other outputs + header_output += header_output_rc1 + decoder_output += decoder_output_rc1 + exec_output += exec_output_rc1 }};