i965/blorp: Simplify depth buffer state setup a bit
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.h
index 2d408b2f36335bdfab63b55b815e78cac91bb005..8b1ea798d908d0f592ecee84b660ef97a50cb28e 100644 (file)
 #include "brw_shader.h"
 #include "brw_ir_fs.h"
 #include "brw_fs_builder.h"
-
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "util/register_allocate.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-#include "intel_asm_annotation.h"
-}
-#include "glsl/nir/glsl_types.h"
-#include "glsl/ir.h"
-#include "glsl/nir/nir.h"
-#include "program/sampler.h"
+#include "compiler/nir/nir.h"
 
 struct bblock_t;
 namespace {
@@ -65,25 +44,9 @@ namespace brw {
 struct brw_gs_compile;
 
 static inline fs_reg
-offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
+offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
 {
-   switch (reg.file) {
-   case BAD_FILE:
-      break;
-   case ARF:
-   case FIXED_GRF:
-   case MRF:
-   case VGRF:
-   case ATTR:
-      return byte_offset(reg,
-                         delta * reg.component_size(bld.dispatch_width()));
-   case UNIFORM:
-      reg.reg_offset += delta;
-      break;
-   case IMM:
-      assert(delta == 0);
-   }
-   return reg;
+   return offset(reg, bld.dispatch_width(), delta);
 }
 
 /**
@@ -101,7 +64,8 @@ public:
               struct gl_program *prog,
               const nir_shader *shader,
               unsigned dispatch_width,
-              int shader_time_index);
+              int shader_time_index,
+              const struct brw_vue_map *input_vue_map = NULL);
    fs_visitor(const struct brw_compiler *compiler, void *log_data,
               void *mem_ctx,
               struct brw_gs_compile *gs_compile,
@@ -127,14 +91,16 @@ public:
                                    uint32_t const_offset);
    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
 
-   bool run_fs(bool do_rep_send);
+   bool run_fs(bool allow_spilling, bool do_rep_send);
    bool run_vs(gl_clip_plane *clip_planes);
+   bool run_tcs_single_patch();
+   bool run_tes();
    bool run_gs();
    bool run_cs();
    void optimize();
-   void allocate_registers();
-   void setup_payload_gen4();
-   void setup_payload_gen6();
+   void allocate_registers(bool allow_spilling);
+   void setup_fs_payload_gen4();
+   void setup_fs_payload_gen6();
    void setup_vs_payload();
    void setup_gs_payload();
    void setup_cs_payload();
@@ -144,8 +110,10 @@ public:
    void assign_urb_setup();
    void convert_attr_sources_to_hw_regs(fs_inst *inst);
    void assign_vs_urb_setup();
+   void assign_tcs_single_patch_urb_setup();
+   void assign_tes_urb_setup();
    void assign_gs_urb_setup();
-   bool assign_regs(bool allow_spilling);
+   bool assign_regs(bool allow_spilling, bool spill_all);
    void assign_regs_trivial();
    void calculate_payload_ranges(int payload_node_count,
                                  int *payload_last_use_ip);
@@ -156,7 +124,7 @@ public:
    void split_virtual_grfs();
    bool compact_virtual_grfs();
    void assign_constant_locations();
-   void demote_pull_constants();
+   void lower_constant_loads();
    void invalidate_live_intervals();
    void calculate_live_intervals();
    void calculate_register_pressure();
@@ -170,6 +138,7 @@ public:
    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
                                  exec_list *acp);
+   bool opt_drop_redundant_mov_to_flags();
    bool opt_register_renaming();
    bool register_coalesce();
    bool compute_to_mrf();
@@ -187,45 +156,28 @@ public:
                                                      fs_inst *inst);
    void vfail(const char *msg, va_list args);
    void fail(const char *msg, ...);
-   void no16(const char *msg);
+   void limit_dispatch_width(unsigned n, const char *msg);
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
+   bool lower_pack();
+   bool lower_d2x();
    bool lower_logical_sends();
    bool lower_integer_multiplication();
+   bool lower_minmax();
    bool lower_simd_width();
    bool opt_combine_constants();
 
    void emit_dummy_fs();
    void emit_repclear_shader();
-   fs_reg *emit_fragcoord_interpolation(bool pixel_center_integer,
-                                        bool origin_upper_left);
-   fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
-                         glsl_interp_qualifier interpolation_mode,
-                         bool is_centroid, bool is_sample);
+   void emit_fragcoord_interpolation(fs_reg wpos);
    fs_reg *emit_frontfacing_interpolation();
    fs_reg *emit_samplepos_setup();
    fs_reg *emit_sampleid_setup();
-   void emit_general_interpolation(fs_reg attr, const char *name,
-                                   const glsl_type *type,
-                                   glsl_interp_qualifier interpolation_mode,
-                                   int location, bool mod_centroid,
-                                   bool mod_sample);
+   fs_reg *emit_samplemaskin_setup();
    fs_reg *emit_vs_system_value(int location);
    void emit_interpolation_setup_gen4();
    void emit_interpolation_setup_gen6();
    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
-   void emit_texture(ir_texture_opcode op,
-                     const glsl_type *dest_type,
-                     fs_reg coordinate, int components,
-                     fs_reg shadow_c,
-                     fs_reg lod, fs_reg dpdy, int grad_components,
-                     fs_reg sample_index,
-                     fs_reg offset,
-                     fs_reg mcs,
-                     int gather_component,
-                     bool is_cube_array,
-                     uint32_t sampler,
-                     fs_reg sampler_reg);
    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
                          const fs_reg &sampler);
    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
@@ -236,13 +188,10 @@ public:
    bool opt_saturate_propagation();
    bool opt_cmod_propagation();
    bool opt_zero_samples();
-   void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
-                     uint32_t spill_offset, int count);
-   void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
-                   uint32_t spill_offset, int count);
 
    void emit_nir_code();
-   void nir_setup_inputs();
+   void nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type,
+                                        unsigned *location);
    void nir_setup_outputs();
    void nir_setup_uniforms();
    void nir_emit_system_values();
@@ -255,10 +204,10 @@ public:
    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
    void nir_emit_load_const(const brw::fs_builder &bld,
                             nir_load_const_instr *instr);
-   void nir_emit_undef(const brw::fs_builder &bld,
-                       nir_ssa_undef_instr *instr);
    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
+   void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
+                               nir_intrinsic_instr *instr);
    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
@@ -267,18 +216,26 @@ public:
                               nir_intrinsic_instr *instr);
    void nir_emit_intrinsic(const brw::fs_builder &bld,
                            nir_intrinsic_instr *instr);
+   void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
+                               nir_intrinsic_instr *instr);
    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
                              int op, nir_intrinsic_instr *instr);
+   void nir_emit_shared_atomic(const brw::fs_builder &bld,
+                               int op, nir_intrinsic_instr *instr);
    void nir_emit_texture(const brw::fs_builder &bld,
                          nir_tex_instr *instr);
    void nir_emit_jump(const brw::fs_builder &bld,
                       nir_jump_instr *instr);
-   fs_reg get_nir_src(nir_src src);
-   fs_reg get_nir_dest(nir_dest dest);
+   fs_reg get_nir_src(const nir_src &src);
+   fs_reg get_nir_src_imm(const nir_src &src);
+   fs_reg get_nir_dest(const nir_dest &dest);
    fs_reg get_nir_image_deref(const nir_deref_var *deref);
+   fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
                      unsigned wr_mask);
 
+   bool optimize_extract_to_float(nir_alu_instr *instr,
+                                  const fs_reg &result);
    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
                                      const fs_reg &result);
 
@@ -296,10 +253,9 @@ public:
                        unsigned stream_id);
    void emit_gs_thread_end();
    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
-                           const fs_reg &indirect_offset, unsigned imm_offset,
-                           unsigned num_components);
+                           unsigned base_offset, const nir_src &offset_src,
+                           unsigned num_components, unsigned first_component);
    void emit_cs_terminate();
-   fs_reg *emit_cs_local_invocation_id_setup();
    fs_reg *emit_cs_work_group_id_setup();
 
    void emit_barrier();
@@ -329,7 +285,7 @@ public:
    struct brw_stage_prog_data *prog_data;
    struct gl_program *prog;
 
-   int *param_size;
+   const struct brw_vue_map *input_vue_map;
 
    int *virtual_grf_start;
    int *virtual_grf_end;
@@ -359,7 +315,6 @@ public:
    fs_reg frag_stencil;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
-   unsigned output_components[VARYING_SLOT_MAX];
    fs_reg dual_src_output;
    bool do_dual_src;
    int first_non_payload_grf;
@@ -374,11 +329,6 @@ public:
 
    bool failed;
    char *fail_msg;
-   bool simd16_unsupported;
-   char *no16_msg;
-
-   /* Result of last visit() method. Still used by emit_texture() */
-   fs_reg result;
 
    /** Register numbers for thread payload fields. */
    struct thread_payload {
@@ -388,7 +338,7 @@ public:
       uint8_t dest_depth_reg;
       uint8_t sample_pos_reg;
       uint8_t sample_mask_in_reg;
-      uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+      uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
       uint8_t local_invocation_id_reg;
 
       /** The number of thread payload registers the hardware will supply. */
@@ -402,16 +352,19 @@ public:
    fs_reg pixel_y;
    fs_reg wpos_w;
    fs_reg pixel_w;
-   fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+   fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
    fs_reg shader_start_time;
    fs_reg userplane[MAX_CLIP_PLANES];
    fs_reg final_gs_vertex_count;
    fs_reg control_data_bits;
+   fs_reg invocation_id;
 
    unsigned grf_used;
    bool spilled_any_registers;
 
-   const unsigned dispatch_width; /**< 8 or 16 */
+   const unsigned dispatch_width; /**< 8, 16 or 32 */
+   unsigned min_dispatch_width;
+   unsigned max_dispatch_width;
 
    int shader_time_index;
 
@@ -433,7 +386,7 @@ public:
                 struct brw_stage_prog_data *prog_data,
                 unsigned promoted_constants,
                 bool runtime_check_aads_emit,
-                const char *stage_abbrev);
+                gl_shader_stage stage);
    ~fs_generator();
 
    void enable_debug(const char *shader_name);
@@ -449,30 +402,17 @@ private:
    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
-   void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
-                                     struct brw_reg src);
    void generate_barrier(fs_inst *inst, struct brw_reg src);
-   void generate_blorp_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
                         struct brw_reg *src);
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                     struct brw_reg surface_index,
                      struct brw_reg sampler_index);
    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
                                  struct brw_reg src,
                                  struct brw_reg surf_index);
-   void generate_math_gen6(fs_inst *inst,
-                           struct brw_reg dst,
-                           struct brw_reg src0,
-                           struct brw_reg src1);
-   void generate_math_gen4(fs_inst *inst,
-                          struct brw_reg dst,
-                          struct brw_reg src);
-   void generate_math_g45(fs_inst *inst,
-                         struct brw_reg dst,
-                         struct brw_reg src);
    void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
-   void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src,
-                     bool negate_value);
+   void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src);
    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
@@ -483,9 +423,9 @@ private:
                                                  struct brw_reg dst,
                                                  struct brw_reg surf_index,
                                                  struct brw_reg offset);
-   void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
-                                            struct brw_reg index,
-                                            struct brw_reg offset);
+   void generate_varying_pull_constant_load_gen4(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg index);
    void generate_varying_pull_constant_load_gen7(fs_inst *inst,
                                                  struct brw_reg dst,
                                                  struct brw_reg index,
@@ -537,16 +477,31 @@ private:
    const void * const key;
    struct brw_stage_prog_data * const prog_data;
 
-   unsigned dispatch_width; /**< 8 or 16 */
+   unsigned dispatch_width; /**< 8, 16 or 32 */
 
    exec_list discard_halt_patches;
    unsigned promoted_constants;
    bool runtime_check_aads_emit;
    bool debug_flag;
    const char *shader_name;
-   const char *stage_abbrev;
+   gl_shader_stage stage;
    void *mem_ctx;
 };
 
 bool brw_do_channel_expressions(struct exec_list *instructions);
 bool brw_do_vector_splitting(struct exec_list *instructions);
+
+void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld,
+                                             const fs_reg &dst,
+                                             const fs_reg &src,
+                                             uint32_t components);
+
+void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
+                                        const fs_reg &dst,
+                                        const fs_reg &src,
+                                        uint32_t components);
+fs_reg setup_imm_df(const brw::fs_builder &bld,
+                    double v);
+
+enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
+                                               nir_intrinsic_op op);