src/intel/compiler/brw_vec4.h

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #ifndef BRW_VEC4_H
  25 #define BRW_VEC4_H
  26
  27 #include "brw_shader.h"
  28
  29 #ifdef __cplusplus
  30 #include "brw_ir_vec4.h"
  31 #include "brw_vec4_builder.h"
  32 #include "brw_vec4_live_variables.h"
  33 #endif
  34
  35 #include "compiler/glsl/ir.h"
  36 #include "compiler/nir/nir.h"
  37
  38
  39 #ifdef __cplusplus
  40 extern "C" {
  41 #endif
  42
  43 const unsigned *
  44 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
  45                            void *log_data,
  46                            void *mem_ctx,
  47                            const nir_shader *nir,
  48                            struct brw_vue_prog_data *prog_data,
  49                            const struct cfg_t *cfg,
  50                            struct brw_compile_stats *stats);
  51
  52 #ifdef __cplusplus
  53 } /* extern "C" */
  54
  55 namespace brw {
  56 /**
  57  * The vertex shader front-end.
  58  *
  59  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
  60  * fixed-function) into VS IR.
  61  */
  62 class vec4_visitor : public backend_shader
  63 {
  64 public:
  65    vec4_visitor(const struct brw_compiler *compiler,
  66                 void *log_data,
  67                 const struct brw_sampler_prog_key_data *key,
  68                 struct brw_vue_prog_data *prog_data,
  69                 const nir_shader *shader,
  70                 void *mem_ctx,
  71                 bool no_spills,
  72                 int shader_time_index);
  73
  74    dst_reg dst_null_f()
  75    {
  76       return dst_reg(brw_null_reg());
  77    }
  78
  79    dst_reg dst_null_df()
  80    {
  81       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
  82    }
  83
  84    dst_reg dst_null_d()
  85    {
  86       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  87    }
  88
  89    dst_reg dst_null_ud()
  90    {
  91       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
  92    }
  93
  94    const struct brw_sampler_prog_key_data * const key_tex;
  95    struct brw_vue_prog_data * const prog_data;
  96    char *fail_msg;
  97    bool failed;
  98
  99    /**
 100     * GLSL IR currently being processed, which is associated with our
 101     * driver IR instructions for debugging purposes.
 102     */
 103    const void *base_ir;
 104    const char *current_annotation;
 105
 106    int first_non_payload_grf;
 107    unsigned int max_grf;
 108    BRW_ANALYSIS(live_analysis, brw::vec4_live_variables,
 109                 backend_shader *) live_analysis;
 110
 111    bool need_all_constants_in_pull_buffer;
 112
 113    /* Regs for vertex results.  Generated at ir_variable visiting time
 114     * for the ir->location's used.
 115     */
 116    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
 117    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
 118    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
 119    int uniforms;
 120
 121    src_reg shader_start_time;
 122
 123    bool run();
 124    void fail(const char *msg, ...);
 125
 126    int setup_uniforms(int payload_reg);
 127
 128    bool reg_allocate_trivial();
 129    bool reg_allocate();
 130    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
 131    int choose_spill_reg(struct ra_graph *g);
 132    void spill_reg(unsigned spill_reg);
 133    void move_grf_array_access_to_scratch();
 134    void move_uniform_array_access_to_pull_constants();
 135    void move_push_constants_to_pull_constants();
 136    void split_uniform_registers();
 137    void pack_uniform_registers();
 138    virtual void invalidate_analysis(brw::analysis_dependency_class c);
 139    void split_virtual_grfs();
 140    bool opt_vector_float();
 141    bool opt_reduce_swizzle();
 142    bool dead_code_eliminate();
 143    bool opt_cmod_propagation();
 144    bool opt_copy_propagation(bool do_constant_prop = true);
 145    bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
 146    bool opt_cse();
 147    bool opt_algebraic();
 148    bool opt_register_coalesce();
 149    bool eliminate_find_live_channel();
 150    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
 151    void opt_set_dependency_control();
 152    void opt_schedule_instructions();
 153    void convert_to_hw_regs();
 154    void fixup_3src_null_dest();
 155
 156    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
 157    bool lower_simd_width();
 158    bool scalarize_df();
 159    bool lower_64bit_mad_to_mul_add();
 160    void apply_logical_swizzle(struct brw_reg *hw_reg,
 161                               vec4_instruction *inst, int arg);
 162
 163    vec4_instruction *emit(vec4_instruction *inst);
 164
 165    vec4_instruction *emit(enum opcode opcode);
 166    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
 167    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
 168                           const src_reg &src0);
 169    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
 170                           const src_reg &src0, const src_reg &src1);
 171    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
 172                           const src_reg &src0, const src_reg &src1,
 173                           const src_reg &src2);
 174
 175    vec4_instruction *emit_before(bblock_t *block,
 176                                  vec4_instruction *inst,
 177                                  vec4_instruction *new_inst);
 178
 179 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
 180 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
 181 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
 182    EMIT1(MOV)
 183    EMIT1(NOT)
 184    EMIT1(RNDD)
 185    EMIT1(RNDE)
 186    EMIT1(RNDZ)
 187    EMIT1(FRC)
 188    EMIT1(F32TO16)
 189    EMIT1(F16TO32)
 190    EMIT2(ADD)
 191    EMIT2(MUL)
 192    EMIT2(MACH)
 193    EMIT2(MAC)
 194    EMIT2(AND)
 195    EMIT2(OR)
 196    EMIT2(XOR)
 197    EMIT2(DP3)
 198    EMIT2(DP4)
 199    EMIT2(DPH)
 200    EMIT2(SHL)
 201    EMIT2(SHR)
 202    EMIT2(ASR)
 203    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
 204                          enum brw_conditional_mod condition);
 205    vec4_instruction *IF(src_reg src0, src_reg src1,
 206                         enum brw_conditional_mod condition);
 207    vec4_instruction *IF(enum brw_predicate predicate);
 208    EMIT1(SCRATCH_READ)
 209    EMIT2(SCRATCH_WRITE)
 210    EMIT3(LRP)
 211    EMIT1(BFREV)
 212    EMIT3(BFE)
 213    EMIT2(BFI1)
 214    EMIT3(BFI2)
 215    EMIT1(FBH)
 216    EMIT1(FBL)
 217    EMIT1(CBIT)
 218    EMIT3(MAD)
 219    EMIT2(ADDC)
 220    EMIT2(SUBB)
 221    EMIT1(DIM)
 222
 223 #undef EMIT1
 224 #undef EMIT2
 225 #undef EMIT3
 226
 227    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
 228                                  src_reg src0, src_reg src1);
 229
 230    /**
 231     * Copy any live channel from \p src to the first channel of the
 232     * result.
 233     */
 234    src_reg emit_uniformize(const src_reg &src);
 235
 236    /** Fix all float operands of a 3-source instruction. */
 237    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
 238
 239    src_reg fix_3src_operand(const src_reg &src);
 240    src_reg resolve_source_modifiers(const src_reg &src);
 241
 242    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 243                                const src_reg &src1 = src_reg());
 244
 245    src_reg fix_math_operand(const src_reg &src);
 246
 247    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
 248    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
 249    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
 250    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
 251    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
 252    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
 253
 254    void emit_texture(ir_texture_opcode op,
 255                      dst_reg dest,
 256                      const glsl_type *dest_type,
 257                      src_reg coordinate,
 258                      int coord_components,
 259                      src_reg shadow_comparator,
 260                      src_reg lod, src_reg lod2,
 261                      src_reg sample_index,
 262                      uint32_t constant_offset,
 263                      src_reg offset_value,
 264                      src_reg mcs,
 265                      uint32_t surface, src_reg surface_reg,
 266                      src_reg sampler_reg);
 267
 268    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
 269                           src_reg surface);
 270    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
 271
 272    void emit_ndc_computation();
 273    void emit_psiz_and_flags(dst_reg reg);
 274    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
 275    virtual void emit_urb_slot(dst_reg reg, int varying);
 276
 277    void emit_shader_time_begin();
 278    void emit_shader_time_end();
 279    void emit_shader_time_write(int shader_time_subindex, src_reg value);
 280
 281    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
 282                               src_reg *reladdr, int reg_offset);
 283    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
 284                           dst_reg dst,
 285                           src_reg orig_src,
 286                           int base_offset);
 287    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
 288                            int base_offset);
 289    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
 290                                 dst_reg dst,
 291                                 src_reg orig_src,
 292                                 int base_offset,
 293                                 src_reg indirect);
 294    void emit_pull_constant_load_reg(dst_reg dst,
 295                                     src_reg surf_index,
 296                                     src_reg offset,
 297                                     bblock_t *before_block,
 298                                     vec4_instruction *before_inst);
 299    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
 300                                 vec4_instruction *inst, src_reg src);
 301
 302    void resolve_ud_negate(src_reg *reg);
 303
 304    bool lower_minmax();
 305
 306    src_reg get_timestamp();
 307
 308    void dump_instruction(const backend_instruction *inst) const;
 309    void dump_instruction(const backend_instruction *inst, FILE *file) const;
 310
 311    bool is_high_sampler(src_reg sampler);
 312
 313    bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
 314
 315    void emit_conversion_from_double(dst_reg dst, src_reg src);
 316    void emit_conversion_to_double(dst_reg dst, src_reg src);
 317
 318    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
 319                                         bool for_write,
 320                                         bblock_t *block = NULL,
 321                                         vec4_instruction *ref = NULL);
 322
 323    virtual void emit_nir_code();
 324    virtual void nir_setup_uniforms();
 325    virtual void nir_emit_impl(nir_function_impl *impl);
 326    virtual void nir_emit_cf_list(exec_list *list);
 327    virtual void nir_emit_if(nir_if *if_stmt);
 328    virtual void nir_emit_loop(nir_loop *loop);
 329    virtual void nir_emit_block(nir_block *block);
 330    virtual void nir_emit_instr(nir_instr *instr);
 331    virtual void nir_emit_load_const(nir_load_const_instr *instr);
 332    src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
 333    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
 334    virtual void nir_emit_alu(nir_alu_instr *instr);
 335    virtual void nir_emit_jump(nir_jump_instr *instr);
 336    virtual void nir_emit_texture(nir_tex_instr *instr);
 337    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
 338    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
 339
 340    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
 341    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
 342    dst_reg get_nir_dest(const nir_dest &dest);
 343    src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
 344                        unsigned num_components = 4);
 345    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
 346                        unsigned num_components = 4);
 347    src_reg get_nir_src(const nir_src &src,
 348                        unsigned num_components = 4);
 349    src_reg get_nir_src_imm(const nir_src &src);
 350    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
 351
 352    dst_reg *nir_locals;
 353    dst_reg *nir_ssa_values;
 354
 355 protected:
 356    void emit_vertex();
 357    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
 358                                    int reg_node_count);
 359    virtual void setup_payload() = 0;
 360    virtual void emit_prolog() = 0;
 361    virtual void emit_thread_end() = 0;
 362    virtual void emit_urb_write_header(int mrf) = 0;
 363    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
 364    virtual void gs_emit_vertex(int stream_id);
 365    virtual void gs_end_primitive();
 366
 367 private:
 368    /**
 369     * If true, then register allocation should fail instead of spilling.
 370     */
 371    const bool no_spills;
 372
 373    int shader_time_index;
 374
 375    unsigned last_scratch; /**< measured in 32-byte (register size) units */
 376 };
 377
 378 } /* namespace brw */
 379 #endif /* __cplusplus */
 380
 381 #endif /* BRW_VEC4_H */