i965/nir/vec4: Prepare source and destination registers for ALU operations
[mesa.git] / src / mesa / drivers / dri / i965 / brw_shader.cpp
index 6e6a2580d1dd16810a90163757e6047a30f63b93..7c5095ddce35bf05d8f497539ce693aed4b83d9c 100644 (file)
@@ -113,17 +113,23 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
         (i == MESA_SHADER_FRAGMENT);
       compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
       compiler->glsl_compiler_options[i].LowerClipDistance = true;
+
+      /* !ARB_gpu_shader5 */
+      if (devinfo->gen < 7)
+         compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
    }
 
    compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
    compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
 
-   if (compiler->scalar_vs) {
-      /* If we're using the scalar backend for vertex shaders, we need to
-       * configure these accordingly.
-       */
-      compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-      compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+   if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", false)) {
+      if (compiler->scalar_vs) {
+         /* If we're using the scalar backend for vertex shaders, we need to
+          * configure these accordingly.
+          */
+         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+      }
       compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
 
       compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
@@ -229,7 +235,8 @@ brw_lower_packing_builtins(struct brw_context *brw,
 }
 
 static void
-process_glsl_ir(struct brw_context *brw,
+process_glsl_ir(gl_shader_stage stage,
+                struct brw_context *brw,
                 struct gl_shader_program *shader_prog,
                 struct gl_shader *shader)
 {
@@ -255,7 +262,9 @@ process_glsl_ir(struct brw_context *brw,
                       EXP_TO_EXP2 |
                       LOG_TO_LOG2 |
                       bitfield_insert |
-                      LDEXP_TO_ARITH);
+                      LDEXP_TO_ARITH |
+                      CARRY_TO_ARITH |
+                      BORROW_TO_ARITH);
 
    /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
     * if-statements need to be flattened.
@@ -275,15 +284,17 @@ process_glsl_ir(struct brw_context *brw,
    lower_quadop_vector(shader->ir, false);
 
    bool lowered_variable_indexing =
-      lower_variable_index_to_cond_assign(shader->ir,
+      lower_variable_index_to_cond_assign((gl_shader_stage)stage,
+                                          shader->ir,
                                           options->EmitNoIndirectInput,
                                           options->EmitNoIndirectOutput,
                                           options->EmitNoIndirectTemp,
                                           options->EmitNoIndirectUniform);
 
    if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
-      perf_debug("Unsupported form of variable indexing in FS; falling "
-                 "back to very inefficient code generation\n");
+      perf_debug("Unsupported form of variable indexing in %s; falling "
+                 "back to very inefficient code generation\n",
+                 _mesa_shader_stage_to_abbrev(shader->Stage));
    }
 
    lower_ubo_reference(shader, shader->ir);
@@ -308,7 +319,7 @@ process_glsl_ir(struct brw_context *brw,
    } while (progress);
 
    if (options->NirOptions != NULL)
-      lower_output_reads(shader->ir);
+      lower_output_reads(stage, shader->ir);
 
    validate_ir_tree(shader->ir);
 
@@ -352,7 +363,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 
       _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
 
-      process_glsl_ir(brw, shProg, shader);
+      process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
 
       /* Make a pass over the IR to add state references for any built-in
        * uniforms that are used.  This has to be done now (during linking).
@@ -387,8 +398,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 
       brw_add_texrect_params(prog);
 
-      if (options->NirOptions)
-         prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
+      if (options->NirOptions) {
+         prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+                                    is_scalar_shader_stage(brw, stage));
+      }
 
       _mesa_reference_program(ctx, &prog, NULL);
    }
@@ -422,6 +435,7 @@ brw_type_for_base_type(const struct glsl_type *type)
       return BRW_REGISTER_TYPE_F;
    case GLSL_TYPE_INT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_SUBROUTINE:
       return BRW_REGISTER_TYPE_D;
    case GLSL_TYPE_UINT:
       return BRW_REGISTER_TYPE_UD;
@@ -527,6 +541,8 @@ brw_instruction_name(enum opcode op)
       return opcode_descs[op].name;
    case FS_OPCODE_FB_WRITE:
       return "fb_write";
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+      return "fb_write_logical";
    case FS_OPCODE_BLORP_FB_WRITE:
       return "blorp_fb_write";
    case FS_OPCODE_REP_FB_WRITE:
@@ -555,43 +571,80 @@ brw_instruction_name(enum opcode op)
 
    case SHADER_OPCODE_TEX:
       return "tex";
+   case SHADER_OPCODE_TEX_LOGICAL:
+      return "tex_logical";
    case SHADER_OPCODE_TXD:
       return "txd";
+   case SHADER_OPCODE_TXD_LOGICAL:
+      return "txd_logical";
    case SHADER_OPCODE_TXF:
       return "txf";
+   case SHADER_OPCODE_TXF_LOGICAL:
+      return "txf_logical";
    case SHADER_OPCODE_TXL:
       return "txl";
+   case SHADER_OPCODE_TXL_LOGICAL:
+      return "txl_logical";
    case SHADER_OPCODE_TXS:
       return "txs";
+   case SHADER_OPCODE_TXS_LOGICAL:
+      return "txs_logical";
    case FS_OPCODE_TXB:
       return "txb";
+   case FS_OPCODE_TXB_LOGICAL:
+      return "txb_logical";
    case SHADER_OPCODE_TXF_CMS:
       return "txf_cms";
+   case SHADER_OPCODE_TXF_CMS_LOGICAL:
+      return "txf_cms_logical";
    case SHADER_OPCODE_TXF_UMS:
       return "txf_ums";
+   case SHADER_OPCODE_TXF_UMS_LOGICAL:
+      return "txf_ums_logical";
    case SHADER_OPCODE_TXF_MCS:
       return "txf_mcs";
+   case SHADER_OPCODE_TXF_MCS_LOGICAL:
+      return "txf_mcs_logical";
    case SHADER_OPCODE_LOD:
       return "lod";
+   case SHADER_OPCODE_LOD_LOGICAL:
+      return "lod_logical";
    case SHADER_OPCODE_TG4:
       return "tg4";
+   case SHADER_OPCODE_TG4_LOGICAL:
+      return "tg4_logical";
    case SHADER_OPCODE_TG4_OFFSET:
       return "tg4_offset";
+   case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+      return "tg4_offset_logical";
+
    case SHADER_OPCODE_SHADER_TIME_ADD:
       return "shader_time_add";
 
    case SHADER_OPCODE_UNTYPED_ATOMIC:
       return "untyped_atomic";
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+      return "untyped_atomic_logical";
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       return "untyped_surface_read";
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+      return "untyped_surface_read_logical";
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
       return "untyped_surface_write";
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+      return "untyped_surface_write_logical";
    case SHADER_OPCODE_TYPED_ATOMIC:
       return "typed_atomic";
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+      return "typed_atomic_logical";
    case SHADER_OPCODE_TYPED_SURFACE_READ:
       return "typed_surface_read";
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+      return "typed_surface_read_logical";
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
       return "typed_surface_write";
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+      return "typed_surface_write_logical";
    case SHADER_OPCODE_MEMORY_FENCE:
       return "memory_fence";
 
@@ -652,8 +705,6 @@ brw_instruction_name(enum opcode op)
    case FS_OPCODE_DISCARD_JUMP:
       return "discard_jump";
 
-   case FS_OPCODE_SET_OMASK:
-      return "set_omask";
    case FS_OPCODE_SET_SAMPLE_ID:
       return "set_sample_id";
    case FS_OPCODE_SET_SIMD4X2_OFFSET:
@@ -846,15 +897,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
    return false;
 }
 
-backend_shader::backend_shader(struct brw_context *brw,
+backend_shader::backend_shader(const struct brw_compiler *compiler,
+                               void *log_data,
                                void *mem_ctx,
                                struct gl_shader_program *shader_prog,
                                struct gl_program *prog,
                                struct brw_stage_prog_data *stage_prog_data,
                                gl_shader_stage stage)
-   : brw(brw),
-     devinfo(brw->intelScreen->devinfo),
-     ctx(&brw->ctx),
+   : compiler(compiler),
+     log_data(log_data),
+     devinfo(compiler->devinfo),
      shader(shader_prog ?
         (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
      shader_prog(shader_prog),
@@ -1145,10 +1197,14 @@ backend_instruction::has_side_effects() const
 {
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
    case SHADER_OPCODE_TYPED_ATOMIC:
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
    case SHADER_OPCODE_MEMORY_FENCE:
    case SHADER_OPCODE_URB_WRITE_SIMD8:
    case FS_OPCODE_FB_WRITE: