i965/blorp: Simplify depth buffer state setup a bit
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.h
index b75ebfedb3d85cb0f032cbe6f2717831acff1d96..8b1ea798d908d0f592ecee84b660ef97a50cb28e 100644 (file)
@@ -30,7 +30,6 @@
 #include "brw_shader.h"
 #include "brw_ir_fs.h"
 #include "brw_fs_builder.h"
-#include "glsl/ir.h"
 #include "compiler/nir/nir.h"
 
 struct bblock_t;
@@ -45,25 +44,9 @@ namespace brw {
 struct brw_gs_compile;
 
 static inline fs_reg
-offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
+offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
 {
-   switch (reg.file) {
-   case BAD_FILE:
-      break;
-   case ARF:
-   case FIXED_GRF:
-   case MRF:
-   case VGRF:
-   case ATTR:
-      return byte_offset(reg,
-                         delta * reg.component_size(bld.dispatch_width()));
-   case UNIFORM:
-      reg.reg_offset += delta;
-      break;
-   case IMM:
-      assert(delta == 0);
-   }
-   return reg;
+   return offset(reg, bld.dispatch_width(), delta);
 }
 
 /**
@@ -108,15 +91,16 @@ public:
                                    uint32_t const_offset);
    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
 
-   bool run_fs(bool do_rep_send);
+   bool run_fs(bool allow_spilling, bool do_rep_send);
    bool run_vs(gl_clip_plane *clip_planes);
+   bool run_tcs_single_patch();
    bool run_tes();
    bool run_gs();
    bool run_cs();
    void optimize();
-   void allocate_registers();
-   void setup_payload_gen4();
-   void setup_payload_gen6();
+   void allocate_registers(bool allow_spilling);
+   void setup_fs_payload_gen4();
+   void setup_fs_payload_gen6();
    void setup_vs_payload();
    void setup_gs_payload();
    void setup_cs_payload();
@@ -126,9 +110,10 @@ public:
    void assign_urb_setup();
    void convert_attr_sources_to_hw_regs(fs_inst *inst);
    void assign_vs_urb_setup();
+   void assign_tcs_single_patch_urb_setup();
    void assign_tes_urb_setup();
    void assign_gs_urb_setup();
-   bool assign_regs(bool allow_spilling);
+   bool assign_regs(bool allow_spilling, bool spill_all);
    void assign_regs_trivial();
    void calculate_payload_ranges(int payload_node_count,
                                  int *payload_last_use_ip);
@@ -139,7 +124,7 @@ public:
    void split_virtual_grfs();
    bool compact_virtual_grfs();
    void assign_constant_locations();
-   void demote_pull_constants();
+   void lower_constant_loads();
    void invalidate_live_intervals();
    void calculate_live_intervals();
    void calculate_register_pressure();
@@ -153,6 +138,7 @@ public:
    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
                                  exec_list *acp);
+   bool opt_drop_redundant_mov_to_flags();
    bool opt_register_renaming();
    bool register_coalesce();
    bool compute_to_mrf();
@@ -170,45 +156,28 @@ public:
                                                      fs_inst *inst);
    void vfail(const char *msg, va_list args);
    void fail(const char *msg, ...);
-   void no16(const char *msg);
+   void limit_dispatch_width(unsigned n, const char *msg);
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
+   bool lower_pack();
+   bool lower_d2x();
    bool lower_logical_sends();
    bool lower_integer_multiplication();
+   bool lower_minmax();
    bool lower_simd_width();
    bool opt_combine_constants();
 
    void emit_dummy_fs();
    void emit_repclear_shader();
-   fs_reg *emit_fragcoord_interpolation(bool pixel_center_integer,
-                                        bool origin_upper_left);
-   fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
-                         glsl_interp_qualifier interpolation_mode,
-                         bool is_centroid, bool is_sample);
+   void emit_fragcoord_interpolation(fs_reg wpos);
    fs_reg *emit_frontfacing_interpolation();
    fs_reg *emit_samplepos_setup();
    fs_reg *emit_sampleid_setup();
-   void emit_general_interpolation(fs_reg *attr, const char *name,
-                                   const glsl_type *type,
-                                   glsl_interp_qualifier interpolation_mode,
-                                   int *location, bool mod_centroid,
-                                   bool mod_sample);
+   fs_reg *emit_samplemaskin_setup();
    fs_reg *emit_vs_system_value(int location);
    void emit_interpolation_setup_gen4();
    void emit_interpolation_setup_gen6();
    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
-   void emit_texture(ir_texture_opcode op,
-                     const glsl_type *dest_type,
-                     fs_reg coordinate, int components,
-                     fs_reg shadow_c,
-                     fs_reg lod, fs_reg dpdy, int grad_components,
-                     fs_reg sample_index,
-                     fs_reg offset,
-                     fs_reg mcs,
-                     int gather_component,
-                     bool is_cube_array,
-                     uint32_t sampler,
-                     fs_reg sampler_reg);
    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
                          const fs_reg &sampler);
    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
@@ -219,13 +188,8 @@ public:
    bool opt_saturate_propagation();
    bool opt_cmod_propagation();
    bool opt_zero_samples();
-   void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
-                     uint32_t spill_offset, int count);
-   void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
-                   uint32_t spill_offset, int count);
 
    void emit_nir_code();
-   void nir_setup_inputs();
    void nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type,
                                         unsigned *location);
    void nir_setup_outputs();
@@ -240,10 +204,10 @@ public:
    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
    void nir_emit_load_const(const brw::fs_builder &bld,
                             nir_load_const_instr *instr);
-   void nir_emit_undef(const brw::fs_builder &bld,
-                       nir_ssa_undef_instr *instr);
    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
+   void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
+                               nir_intrinsic_instr *instr);
    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
@@ -262,13 +226,16 @@ public:
                          nir_tex_instr *instr);
    void nir_emit_jump(const brw::fs_builder &bld,
                       nir_jump_instr *instr);
-   fs_reg get_nir_src(nir_src src);
-   fs_reg get_nir_dest(nir_dest dest);
+   fs_reg get_nir_src(const nir_src &src);
+   fs_reg get_nir_src_imm(const nir_src &src);
+   fs_reg get_nir_dest(const nir_dest &dest);
    fs_reg get_nir_image_deref(const nir_deref_var *deref);
    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
                      unsigned wr_mask);
 
+   bool optimize_extract_to_float(nir_alu_instr *instr,
+                                  const fs_reg &result);
    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
                                      const fs_reg &result);
 
@@ -287,9 +254,8 @@ public:
    void emit_gs_thread_end();
    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
                            unsigned base_offset, const nir_src &offset_src,
-                           unsigned num_components);
+                           unsigned num_components, unsigned first_component);
    void emit_cs_terminate();
-   fs_reg *emit_cs_local_invocation_id_setup();
    fs_reg *emit_cs_work_group_id_setup();
 
    void emit_barrier();
@@ -321,8 +287,6 @@ public:
 
    const struct brw_vue_map *input_vue_map;
 
-   int *param_size;
-
    int *virtual_grf_start;
    int *virtual_grf_end;
    brw::fs_live_variables *live_intervals;
@@ -351,7 +315,6 @@ public:
    fs_reg frag_stencil;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
-   unsigned output_components[VARYING_SLOT_MAX];
    fs_reg dual_src_output;
    bool do_dual_src;
    int first_non_payload_grf;
@@ -366,11 +329,6 @@ public:
 
    bool failed;
    char *fail_msg;
-   bool simd16_unsupported;
-   char *no16_msg;
-
-   /* Result of last visit() method. Still used by emit_texture() */
-   fs_reg result;
 
    /** Register numbers for thread payload fields. */
    struct thread_payload {
@@ -380,7 +338,7 @@ public:
       uint8_t dest_depth_reg;
       uint8_t sample_pos_reg;
       uint8_t sample_mask_in_reg;
-      uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+      uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
       uint8_t local_invocation_id_reg;
 
       /** The number of thread payload registers the hardware will supply. */
@@ -394,16 +352,19 @@ public:
    fs_reg pixel_y;
    fs_reg wpos_w;
    fs_reg pixel_w;
-   fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+   fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
    fs_reg shader_start_time;
    fs_reg userplane[MAX_CLIP_PLANES];
    fs_reg final_gs_vertex_count;
    fs_reg control_data_bits;
+   fs_reg invocation_id;
 
    unsigned grf_used;
    bool spilled_any_registers;
 
-   const unsigned dispatch_width; /**< 8 or 16 */
+   const unsigned dispatch_width; /**< 8, 16 or 32 */
+   unsigned min_dispatch_width;
+   unsigned max_dispatch_width;
 
    int shader_time_index;
 
@@ -441,30 +402,17 @@ private:
    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
-   void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
-                                     struct brw_reg src);
    void generate_barrier(fs_inst *inst, struct brw_reg src);
-   void generate_blorp_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
                         struct brw_reg *src);
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                     struct brw_reg surface_index,
                      struct brw_reg sampler_index);
    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
                                  struct brw_reg src,
                                  struct brw_reg surf_index);
-   void generate_math_gen6(fs_inst *inst,
-                           struct brw_reg dst,
-                           struct brw_reg src0,
-                           struct brw_reg src1);
-   void generate_math_gen4(fs_inst *inst,
-                          struct brw_reg dst,
-                          struct brw_reg src);
-   void generate_math_g45(fs_inst *inst,
-                         struct brw_reg dst,
-                         struct brw_reg src);
    void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
-   void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src,
-                     bool negate_value);
+   void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src);
    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
@@ -475,9 +423,9 @@ private:
                                                  struct brw_reg dst,
                                                  struct brw_reg surf_index,
                                                  struct brw_reg offset);
-   void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
-                                            struct brw_reg index,
-                                            struct brw_reg offset);
+   void generate_varying_pull_constant_load_gen4(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg index);
    void generate_varying_pull_constant_load_gen7(fs_inst *inst,
                                                  struct brw_reg dst,
                                                  struct brw_reg index,
@@ -529,7 +477,7 @@ private:
    const void * const key;
    struct brw_stage_prog_data * const prog_data;
 
-   unsigned dispatch_width; /**< 8 or 16 */
+   unsigned dispatch_width; /**< 8, 16 or 32 */
 
    exec_list discard_halt_patches;
    unsigned promoted_constants;
@@ -542,3 +490,18 @@ private:
 
 bool brw_do_channel_expressions(struct exec_list *instructions);
 bool brw_do_vector_splitting(struct exec_list *instructions);
+
+void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld,
+                                             const fs_reg &dst,
+                                             const fs_reg &src,
+                                             uint32_t components);
+
+void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
+                                        const fs_reg &dst,
+                                        const fs_reg &src,
+                                        uint32_t components);
+fs_reg setup_imm_df(const brw::fs_builder &bld,
+                    double v);
+
+enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
+                                               nir_intrinsic_op op);