src/intel/compiler/brw_fs_lower_conversions.cpp

   1 /*
   2  * Copyright © 2015 Connor Abbott
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_fs.h"
  25 #include "brw_cfg.h"
  26 #include "brw_fs_builder.h"
  27
  28 using namespace brw;
  29
  30 static bool
  31 supports_type_conversion(const fs_inst *inst) {
  32    switch (inst->opcode) {
  33    case BRW_OPCODE_MOV:
  34    case SHADER_OPCODE_MOV_INDIRECT:
  35       return true;
  36    case BRW_OPCODE_SEL:
  37       return inst->dst.type == get_exec_type(inst);
  38    default:
  39       /* FIXME: We assume the opcodes don't explicitly mentioned
  40        * before just work fine with arbitrary conversions.
  41        */
  42       return true;
  43    }
  44 }
  45
  46 bool
  47 fs_visitor::lower_conversions()
  48 {
  49    bool progress = false;
  50
  51    foreach_block_and_inst(block, fs_inst, inst, cfg) {
  52       const fs_builder ibld(this, block, inst);
  53       fs_reg dst = inst->dst;
  54       bool saturate = inst->saturate;
  55
  56       if (supports_type_conversion(inst)) {
  57          if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
  58             /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
  59              * Single Precision Float":
  60              *
  61              *    The upper Dword of every Qword will be written with undefined
  62              *    value when converting DF to F.
  63              *
  64              * So we need to allocate a temporary that's two registers, and then do
  65              * a strided MOV to get the lower DWord of every Qword that has the
  66              * result.
  67              */
  68             fs_reg temp = ibld.vgrf(get_exec_type(inst));
  69             fs_reg strided_temp = subscript(temp, dst.type, 0);
  70
  71             assert(inst->size_written == inst->dst.component_size(inst->exec_size));
  72             inst->dst = strided_temp;
  73             inst->saturate = false;
  74             /* As it is an strided destination, we write n-times more being n the
  75              * size ratio between source and destination types. Update
  76              * size_written accordingly.
  77              */
  78             inst->size_written = inst->dst.component_size(inst->exec_size);
  79             ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate;
  80
  81             progress = true;
  82          }
  83       } else {
  84          fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
  85
  86          assert(inst->size_written == inst->dst.component_size(inst->exec_size));
  87          inst->dst = temp0;
  88          /* As it is an strided destination, we write n-times more being n the
  89           * size ratio between source and destination types. Update
  90           * size_written accordingly.
  91           */
  92          inst->size_written = inst->dst.component_size(inst->exec_size);
  93          inst->saturate = false;
  94          /* Now, do the conversion to original destination's type. In next iteration,
  95           * we will lower it if it is a d2f conversion.
  96           */
  97          ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
  98
  99          progress = true;
 100       }
 101    }
 102
 103    if (progress)
 104       invalidate_live_intervals();
 105
 106    return progress;
 107 }