X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4.h;h=4a5b57775c30abea3bb29aaacbf96267b7b49176;hb=a92e5f7cf63d496ad7830b5cea4bbab287c25b8e;hp=87401351d45910ff434c50f510dc7924b5be6f01;hpb=28fe02ce6e6fa6061cf69af9b292ee6553591473;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 87401351d45..4a5b57775c3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -28,15 +28,64 @@ #include "brw_shader.h" #include "main/compiler.h" #include "program/hash_table.h" +#include "brw_program.h" +#ifdef __cplusplus extern "C" { -#include "brw_vs.h" +#endif + #include "brw_context.h" #include "brw_eu.h" -}; + +#ifdef __cplusplus +}; /* extern "C" */ +#include "gen8_generator.h" +#endif #include "glsl/ir.h" + +struct brw_vec4_compile { + GLuint last_scratch; /**< measured in 32-byte (register size) units */ +}; + + +struct brw_vec4_prog_key { + GLuint program_string_id; + + /** + * True if at least one clip flag is enabled, regardless of whether the + * shader uses clip planes or gl_ClipDistance. + */ + GLuint userclip_active:1; + + /** + * How many user clipping planes are being uploaded to the vertex shader as + * push constants. + */ + GLuint nr_userclip_plane_consts:4; + + GLuint clamp_vertex_color:1; + + struct brw_sampler_prog_key_data tex; +}; + + +#ifdef __cplusplus +extern "C" { +#endif + +void +brw_vec4_setup_prog_key_for_precompile(struct gl_context *ctx, + struct brw_vec4_prog_key *key, + GLuint id, struct gl_program *prog); +bool brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a, + const struct brw_vec4_prog_data *b); +void brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data); + +#ifdef __cplusplus +} /* extern "C" */ + namespace brw { class dst_reg; @@ -44,21 +93,10 @@ class dst_reg; unsigned swizzle_for_size(int size); -enum register_file { - ARF = BRW_ARCHITECTURE_REGISTER_FILE, - GRF = BRW_GENERAL_REGISTER_FILE, - MRF = BRW_MESSAGE_REGISTER_FILE, - IMM = BRW_IMMEDIATE_VALUE, - HW_REG, /* a struct brw_reg */ - ATTR, - UNIFORM, /* prog_data->params[hw_reg] */ - BAD_FILE -}; - class reg { public: - /** Register file: ARF, GRF, MRF, IMM. */ + /** Register file: GRF, MRF, IMM. */ enum register_file file; /** virtual register number. 0 = fixed hw reg */ int reg; @@ -79,17 +117,7 @@ public: class src_reg : public reg { public: - /* Callers of this ralloc-based new need not call delete. It's - * easier to just ralloc_free 'ctx' (or any of its ancestors). */ - static void* operator new(size_t size, void *ctx) - { - void *node; - - node = ralloc_size(ctx, size); - assert(node != NULL); - - return node; - } + DECLARE_RALLOC_CXX_OPERATORS(src_reg) void init(); @@ -117,17 +145,7 @@ public: class dst_reg : public reg { public: - /* Callers of this ralloc-based new need not call delete. It's - * easier to just ralloc_free 'ctx' (or any of its ancestors). */ - static void* operator new(size_t size, void *ctx) - { - void *node; - - node = ralloc_size(ctx, size); - assert(node != NULL); - - return node; - } + DECLARE_RALLOC_CXX_OPERATORS(dst_reg) void init(); @@ -144,19 +162,12 @@ public: src_reg *reladdr; }; +dst_reg +with_writemask(dst_reg const &r, int mask); + class vec4_instruction : public backend_instruction { public: - /* Callers of this ralloc-based new need not call delete. It's - * easier to just ralloc_free 'ctx' (or any of its ancestors). */ - static void* operator new(size_t size, void *ctx) - { - void *node; - - node = rzalloc_size(ctx, size); - assert(node != NULL); - - return node; - } + DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction) vec4_instruction(vec4_visitor *v, enum opcode opcode, dst_reg dst = dst_reg(), @@ -165,7 +176,7 @@ public: src_reg src2 = src_reg()); struct brw_reg get_dst(void); - struct brw_reg get_src(int i); + struct brw_reg get_src(const struct brw_vec4_prog_data *prog_data, int i); dst_reg dst; src_reg src[3]; @@ -181,7 +192,7 @@ public: int target; /**< MRT target. */ bool shadow_compare; - bool eot; + enum brw_urb_write_flags urb_write_flags; bool header_present; int mlen; /**< SEND message length */ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ @@ -193,11 +204,14 @@ public: const void *ir; const char *annotation; - bool is_tex(); - bool is_math(); bool is_send_from_grf(); bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask); void reswizzle_dst(int dst_writemask, int swizzle); + + bool depends_on_flags() + { + return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; + } }; /** @@ -216,7 +230,12 @@ public: struct brw_vec4_prog_data *prog_data, struct gl_shader_program *shader_prog, struct brw_shader *shader, - void *mem_ctx); + void *mem_ctx, + bool debug_flag, + bool no_spills, + shader_time_shader_type st_base, + shader_time_shader_type st_written, + shader_time_shader_type st_reset); ~vec4_visitor(); dst_reg dst_null_f() @@ -229,7 +248,11 @@ public: return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } - struct gl_program *prog; + dst_reg dst_null_ud() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + } + struct brw_vec4_compile *c; const struct brw_vec4_prog_key *key; struct brw_vec4_prog_data *prog_data; @@ -250,8 +273,8 @@ public: int virtual_grf_array_size; int first_non_payload_grf; unsigned int max_grf; - int *virtual_grf_def; - int *virtual_grf_use; + int *virtual_grf_start; + int *virtual_grf_end; dst_reg userplane[MAX_CLIP_PLANES]; /** @@ -295,6 +318,8 @@ public: virtual void visit(ir_discard *); virtual void visit(ir_texture *); virtual void visit(ir_if *); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); /*@}*/ src_reg result; @@ -320,7 +345,6 @@ public: void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); - void setup_payload(); bool reg_allocate_trivial(); bool reg_allocate(); void evaluate_spill_costs(float *spill_costs, bool *no_spill); @@ -332,6 +356,7 @@ public: void split_uniform_registers(); void pack_uniform_registers(); void calculate_live_intervals(); + void invalidate_live_intervals(); void split_virtual_grfs(); bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); @@ -339,6 +364,7 @@ public: bool opt_algebraic(); bool opt_register_coalesce(); void opt_set_dependency_control(); + void opt_schedule_instructions(); bool can_do_source_mods(vec4_instruction *inst); @@ -346,6 +372,8 @@ public: vec4_instruction *emit(enum opcode opcode); + vec4_instruction *emit(enum opcode opcode, dst_reg dst); + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); vec4_instruction *emit(enum opcode opcode, dst_reg dst, @@ -385,6 +413,17 @@ public: vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); + vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x); + vec4_instruction *BFREV(dst_reg dst, src_reg value); + vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value); + vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset); + vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base); + vec4_instruction *FBH(dst_reg dst, src_reg value); + vec4_instruction *FBL(dst_reg dst, src_reg value); + vec4_instruction *CBIT(dst_reg dst, src_reg value); + vec4_instruction *MAD(dst_reg dst, src_reg c, src_reg b, src_reg a); + vec4_instruction *ADDC(dst_reg dst, src_reg src0, src_reg src1); + vec4_instruction *SUBB(dst_reg dst, src_reg src0, src_reg src1); int implied_mrf_writes(vec4_instruction *inst); @@ -394,8 +433,7 @@ public: vec4_instruction *pre_rhs_inst, vec4_instruction *last_rhs_inst); - bool try_copy_propagation(struct intel_context *intel, - vec4_instruction *inst, int arg, + bool try_copy_propagation(vec4_instruction *inst, int arg, src_reg *values[4]); /** Walks an exec_list of ir_instruction and sends it through this visitor. */ @@ -429,6 +467,8 @@ public: void emit_scs(ir_instruction *ir, enum prog_opcode op, dst_reg dst, const src_reg &src); + src_reg fix_3src_operand(src_reg src); + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); void emit_math(enum opcode opcode, dst_reg dst, src_reg src); @@ -440,11 +480,13 @@ public: void emit_pack_half_2x16(dst_reg dst, src_reg src0); void emit_unpack_half_2x16(dst_reg dst, src_reg src0); + uint32_t gather_channel(ir_texture *ir, int sampler); + src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int sampler); void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler); void emit_ndc_computation(); void emit_psiz_and_flags(struct brw_reg reg); - void emit_clip_distances(struct brw_reg reg, int offset); + void emit_clip_distances(dst_reg reg, int offset); void emit_generic_urb_slot(dst_reg reg, int varying); void emit_urb_slot(int mrf, int varying); @@ -453,6 +495,13 @@ public: void emit_shader_time_write(enum shader_time_shader_type type, src_reg value); + void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, + dst_reg dst, src_reg offset, src_reg src0, + src_reg src1); + + void emit_untyped_surface_read(unsigned surf_index, dst_reg dst, + src_reg offset); + src_reg get_scratch_offset(vec4_instruction *inst, src_reg *reladdr, int reg_offset); src_reg get_pull_constant_offset(vec4_instruction *inst, @@ -469,56 +518,46 @@ public: int base_offset); bool try_emit_sat(ir_expression *ir); + bool try_emit_mad(ir_expression *ir, int mul_arg); void resolve_ud_negate(src_reg *reg); src_reg get_timestamp(); bool process_move_condition(ir_rvalue *ir); - void dump_instruction(vec4_instruction *inst); - void dump_instructions(); + void dump_instruction(backend_instruction *inst); + + void visit_atomic_counter_intrinsic(ir_call *ir); protected: void emit_vertex(); + void lower_attributes_to_hw_regs(const int *attribute_map, + bool interleaved); + void setup_payload_interference(struct ra_graph *g, int first_payload_node, + int reg_node_count); virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0; - virtual int setup_attributes(int payload_reg) = 0; + virtual void setup_payload() = 0; virtual void emit_prolog() = 0; virtual void emit_program_code() = 0; virtual void emit_thread_end() = 0; virtual void emit_urb_write_header(int mrf) = 0; virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; -}; - -class vec4_vs_visitor : public vec4_visitor -{ -public: - vec4_vs_visitor(struct brw_context *brw, - struct brw_vs_compile *vs_compile, - struct brw_vs_prog_data *vs_prog_data, - struct gl_shader_program *prog, - struct brw_shader *shader, - void *mem_ctx); + virtual int compute_array_stride(ir_dereference_array *ir); -protected: - virtual dst_reg *make_reg_for_system_value(ir_variable *ir); - virtual int setup_attributes(int payload_reg); - virtual void emit_prolog(); - virtual void emit_program_code(); - virtual void emit_thread_end(); - virtual void emit_urb_write_header(int mrf); - virtual vec4_instruction *emit_urb_write_opcode(bool complete); + const bool debug_flag; private: - void setup_vp_regs(); - dst_reg get_vp_dst_reg(const prog_dst_register &dst); - src_reg get_vp_src_reg(const prog_src_register &src); - - struct brw_vs_compile * const vs_compile; - struct brw_vs_prog_data * const vs_prog_data; - src_reg *vp_temp_regs; - src_reg vp_addr_reg; + /** + * If true, then register allocation should fail instead of spilling. + */ + const bool no_spills; + + const shader_time_shader_type st_base; + const shader_time_shader_type st_written; + const shader_time_shader_type st_reset; }; + /** * The vertex shader code generator. * @@ -530,7 +569,9 @@ public: vec4_generator(struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_program *prog, - void *mem_ctx); + struct brw_vec4_prog_data *prog_data, + void *mem_ctx, + bool debug_flag); ~vec4_generator(); const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size); @@ -564,7 +605,17 @@ private: struct brw_reg dst, struct brw_reg src); - void generate_urb_write(vec4_instruction *inst); + void generate_vs_urb_write(vec4_instruction *inst); + void generate_gs_urb_write(vec4_instruction *inst); + void generate_gs_thread_end(vec4_instruction *inst); + void generate_gs_set_write_offset(struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_gs_set_vertex_count(struct brw_reg dst, + struct brw_reg src); + void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src); + void generate_gs_prepare_channel_masks(struct brw_reg dst); + void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); void generate_scratch_write(vec4_instruction *inst, @@ -582,10 +633,21 @@ private: struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset); + void generate_unpack_flags(vec4_instruction *inst, + struct brw_reg dst); + + void generate_untyped_atomic(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg atomic_op, + struct brw_reg surf_index); + + void generate_untyped_surface_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index); + + void mark_surface_used(unsigned surf_index); struct brw_context *brw; - struct intel_context *intel; - struct gl_context *ctx; struct brw_compile *p; @@ -593,9 +655,73 @@ private: struct gl_shader *shader; const struct gl_program *prog; + struct brw_vec4_prog_data *prog_data; + void *mem_ctx; + const bool debug_flag; }; +/** + * The vertex shader code generator. + * + * Translates VS IR to actual i965 assembly code. + */ +class gen8_vec4_generator : public gen8_generator +{ +public: + gen8_vec4_generator(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + struct brw_vec4_prog_data *prog_data, + void *mem_ctx, + bool debug_flag); + ~gen8_vec4_generator(); + + const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size); + +private: + void generate_code(exec_list *instructions); + void generate_vec4_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + + void generate_tex(vec4_instruction *inst, + struct brw_reg dst); + + void generate_urb_write(vec4_instruction *ir, bool copy_g0); + void generate_gs_thread_end(vec4_instruction *ir); + void generate_gs_set_write_offset(struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_gs_set_vertex_count(struct brw_reg dst, + struct brw_reg src); + void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src); + void generate_gs_prepare_channel_masks(struct brw_reg dst); + void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); + + void generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index); + void generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index); + void generate_pull_constant_load(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index, + struct brw_reg offset); + + void mark_surface_used(unsigned surf_index); + + struct brw_vec4_prog_data *prog_data; + + const bool debug_flag; +}; + + } /* namespace brw */ +#endif /* __cplusplus */ #endif /* BRW_VEC4_H */