src/intel/compiler/brw_vec4.h

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #ifndef BRW_VEC4_H
  25 #define BRW_VEC4_H
  26
  27 #include "brw_shader.h"
  28
  29 #ifdef __cplusplus
  30 #include "brw_ir_vec4.h"
  31 #include "brw_ir_performance.h"
  32 #include "brw_vec4_builder.h"
  33 #include "brw_vec4_live_variables.h"
  34 #endif
  35
  36 #include "compiler/glsl/ir.h"
  37 #include "compiler/nir/nir.h"
  38
  39
  40 #ifdef __cplusplus
  41 extern "C" {
  42 #endif
  43
  44 const unsigned *
  45 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
  46                            void *log_data,
  47                            void *mem_ctx,
  48                            const nir_shader *nir,
  49                            struct brw_vue_prog_data *prog_data,
  50                            const struct cfg_t *cfg,
  51                            struct brw_compile_stats *stats);
  52
  53 #ifdef __cplusplus
  54 } /* extern "C" */
  55
  56 namespace brw {
  57 /**
  58  * The vertex shader front-end.
  59  *
  60  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
  61  * fixed-function) into VS IR.
  62  */
  63 class vec4_visitor : public backend_shader
  64 {
  65 public:
  66    vec4_visitor(const struct brw_compiler *compiler,
  67                 void *log_data,
  68                 const struct brw_sampler_prog_key_data *key,
  69                 struct brw_vue_prog_data *prog_data,
  70                 const nir_shader *shader,
  71                 void *mem_ctx,
  72                 bool no_spills,
  73                 int shader_time_index);
  74
  75    dst_reg dst_null_f()
  76    {
  77       return dst_reg(brw_null_reg());
  78    }
  79
  80    dst_reg dst_null_df()
  81    {
  82       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
  83    }
  84
  85    dst_reg dst_null_d()
  86    {
  87       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  88    }
  89
  90    dst_reg dst_null_ud()
  91    {
  92       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
  93    }
  94
  95    const struct brw_sampler_prog_key_data * const key_tex;
  96    struct brw_vue_prog_data * const prog_data;
  97    char *fail_msg;
  98    bool failed;
  99
 100    /**
 101     * GLSL IR currently being processed, which is associated with our
 102     * driver IR instructions for debugging purposes.
 103     */
 104    const void *base_ir;
 105    const char *current_annotation;
 106
 107    int first_non_payload_grf;
 108    unsigned int max_grf;
 109    BRW_ANALYSIS(live_analysis, brw::vec4_live_variables,
 110                 backend_shader *) live_analysis;
 111    BRW_ANALYSIS(performance_analysis, brw::performance,
 112                 vec4_visitor *) performance_analysis;
 113
 114    bool need_all_constants_in_pull_buffer;
 115
 116    /* Regs for vertex results.  Generated at ir_variable visiting time
 117     * for the ir->location's used.
 118     */
 119    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
 120    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
 121    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
 122    int uniforms;
 123
 124    src_reg shader_start_time;
 125
 126    bool run();
 127    void fail(const char *msg, ...);
 128
 129    int setup_uniforms(int payload_reg);
 130
 131    bool reg_allocate_trivial();
 132    bool reg_allocate();
 133    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
 134    int choose_spill_reg(struct ra_graph *g);
 135    void spill_reg(unsigned spill_reg);
 136    void move_grf_array_access_to_scratch();
 137    void move_uniform_array_access_to_pull_constants();
 138    void move_push_constants_to_pull_constants();
 139    void split_uniform_registers();
 140    void pack_uniform_registers();
 141    virtual void invalidate_analysis(brw::analysis_dependency_class c);
 142    void split_virtual_grfs();
 143    bool opt_vector_float();
 144    bool opt_reduce_swizzle();
 145    bool dead_code_eliminate();
 146    bool opt_cmod_propagation();
 147    bool opt_copy_propagation(bool do_constant_prop = true);
 148    bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
 149    bool opt_cse();
 150    bool opt_algebraic();
 151    bool opt_register_coalesce();
 152    bool eliminate_find_live_channel();
 153    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
 154    void opt_set_dependency_control();
 155    void opt_schedule_instructions();
 156    void convert_to_hw_regs();
 157    void fixup_3src_null_dest();
 158
 159    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
 160    bool lower_simd_width();
 161    bool scalarize_df();
 162    bool lower_64bit_mad_to_mul_add();
 163    void apply_logical_swizzle(struct brw_reg *hw_reg,
 164                               vec4_instruction *inst, int arg);
 165
 166    vec4_instruction *emit(vec4_instruction *inst);
 167
 168    vec4_instruction *emit(enum opcode opcode);
 169    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
 170    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
 171                           const src_reg &src0);
 172    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
 173                           const src_reg &src0, const src_reg &src1);
 174    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
 175                           const src_reg &src0, const src_reg &src1,
 176                           const src_reg &src2);
 177
 178    vec4_instruction *emit_before(bblock_t *block,
 179                                  vec4_instruction *inst,
 180                                  vec4_instruction *new_inst);
 181
 182 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
 183 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
 184 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
 185    EMIT1(MOV)
 186    EMIT1(NOT)
 187    EMIT1(RNDD)
 188    EMIT1(RNDE)
 189    EMIT1(RNDZ)
 190    EMIT1(FRC)
 191    EMIT1(F32TO16)
 192    EMIT1(F16TO32)
 193    EMIT2(ADD)
 194    EMIT2(MUL)
 195    EMIT2(MACH)
 196    EMIT2(MAC)
 197    EMIT2(AND)
 198    EMIT2(OR)
 199    EMIT2(XOR)
 200    EMIT2(DP3)
 201    EMIT2(DP4)
 202    EMIT2(DPH)
 203    EMIT2(SHL)
 204    EMIT2(SHR)
 205    EMIT2(ASR)
 206    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
 207                          enum brw_conditional_mod condition);
 208    vec4_instruction *IF(src_reg src0, src_reg src1,
 209                         enum brw_conditional_mod condition);
 210    vec4_instruction *IF(enum brw_predicate predicate);
 211    EMIT1(SCRATCH_READ)
 212    EMIT2(SCRATCH_WRITE)
 213    EMIT3(LRP)
 214    EMIT1(BFREV)
 215    EMIT3(BFE)
 216    EMIT2(BFI1)
 217    EMIT3(BFI2)
 218    EMIT1(FBH)
 219    EMIT1(FBL)
 220    EMIT1(CBIT)
 221    EMIT3(MAD)
 222    EMIT2(ADDC)
 223    EMIT2(SUBB)
 224    EMIT1(DIM)
 225
 226 #undef EMIT1
 227 #undef EMIT2
 228 #undef EMIT3
 229
 230    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
 231                                  src_reg src0, src_reg src1);
 232
 233    /**
 234     * Copy any live channel from \p src to the first channel of the
 235     * result.
 236     */
 237    src_reg emit_uniformize(const src_reg &src);
 238
 239    /** Fix all float operands of a 3-source instruction. */
 240    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
 241
 242    src_reg fix_3src_operand(const src_reg &src);
 243    src_reg resolve_source_modifiers(const src_reg &src);
 244
 245    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 246                                const src_reg &src1 = src_reg());
 247
 248    src_reg fix_math_operand(const src_reg &src);
 249
 250    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
 251    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
 252    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
 253    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
 254    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
 255    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
 256
 257    void emit_texture(ir_texture_opcode op,
 258                      dst_reg dest,
 259                      const glsl_type *dest_type,
 260                      src_reg coordinate,
 261                      int coord_components,
 262                      src_reg shadow_comparator,
 263                      src_reg lod, src_reg lod2,
 264                      src_reg sample_index,
 265                      uint32_t constant_offset,
 266                      src_reg offset_value,
 267                      src_reg mcs,
 268                      uint32_t surface, src_reg surface_reg,
 269                      src_reg sampler_reg);
 270
 271    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
 272                           src_reg surface);
 273    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
 274
 275    void emit_ndc_computation();
 276    void emit_psiz_and_flags(dst_reg reg);
 277    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
 278    virtual void emit_urb_slot(dst_reg reg, int varying);
 279
 280    void emit_shader_time_begin();
 281    void emit_shader_time_end();
 282    void emit_shader_time_write(int shader_time_subindex, src_reg value);
 283
 284    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
 285                               src_reg *reladdr, int reg_offset);
 286    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
 287                           dst_reg dst,
 288                           src_reg orig_src,
 289                           int base_offset);
 290    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
 291                            int base_offset);
 292    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
 293                                 dst_reg dst,
 294                                 src_reg orig_src,
 295                                 int base_offset,
 296                                 src_reg indirect);
 297    void emit_pull_constant_load_reg(dst_reg dst,
 298                                     src_reg surf_index,
 299                                     src_reg offset,
 300                                     bblock_t *before_block,
 301                                     vec4_instruction *before_inst);
 302    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
 303                                 vec4_instruction *inst, src_reg src);
 304
 305    void resolve_ud_negate(src_reg *reg);
 306
 307    bool lower_minmax();
 308
 309    src_reg get_timestamp();
 310
 311    void dump_instruction(const backend_instruction *inst) const;
 312    void dump_instruction(const backend_instruction *inst, FILE *file) const;
 313
 314    bool is_high_sampler(src_reg sampler);
 315
 316    bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
 317
 318    void emit_conversion_from_double(dst_reg dst, src_reg src);
 319    void emit_conversion_to_double(dst_reg dst, src_reg src);
 320
 321    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
 322                                         bool for_write,
 323                                         bblock_t *block = NULL,
 324                                         vec4_instruction *ref = NULL);
 325
 326    virtual void emit_nir_code();
 327    virtual void nir_setup_uniforms();
 328    virtual void nir_emit_impl(nir_function_impl *impl);
 329    virtual void nir_emit_cf_list(exec_list *list);
 330    virtual void nir_emit_if(nir_if *if_stmt);
 331    virtual void nir_emit_loop(nir_loop *loop);
 332    virtual void nir_emit_block(nir_block *block);
 333    virtual void nir_emit_instr(nir_instr *instr);
 334    virtual void nir_emit_load_const(nir_load_const_instr *instr);
 335    src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
 336    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
 337    virtual void nir_emit_alu(nir_alu_instr *instr);
 338    virtual void nir_emit_jump(nir_jump_instr *instr);
 339    virtual void nir_emit_texture(nir_tex_instr *instr);
 340    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
 341    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
 342
 343    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
 344    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
 345    dst_reg get_nir_dest(const nir_dest &dest);
 346    src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
 347                        unsigned num_components = 4);
 348    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
 349                        unsigned num_components = 4);
 350    src_reg get_nir_src(const nir_src &src,
 351                        unsigned num_components = 4);
 352    src_reg get_nir_src_imm(const nir_src &src);
 353    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
 354
 355    dst_reg *nir_locals;
 356    dst_reg *nir_ssa_values;
 357
 358 protected:
 359    void emit_vertex();
 360    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
 361                                    int reg_node_count);
 362    virtual void setup_payload() = 0;
 363    virtual void emit_prolog() = 0;
 364    virtual void emit_thread_end() = 0;
 365    virtual void emit_urb_write_header(int mrf) = 0;
 366    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
 367    virtual void gs_emit_vertex(int stream_id);
 368    virtual void gs_end_primitive();
 369
 370 private:
 371    /**
 372     * If true, then register allocation should fail instead of spilling.
 373     */
 374    const bool no_spills;
 375
 376    int shader_time_index;
 377
 378    unsigned last_scratch; /**< measured in 32-byte (register size) units */
 379 };
 380
 381 } /* namespace brw */
 382 #endif /* __cplusplus */
 383
 384 #endif /* BRW_VEC4_H */