i965: Use NIR for lowering texture swizzle
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.h
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26
27 #include <stdint.h>
28 #include "brw_shader.h"
29 #include "main/compiler.h"
30 #include "program/hash_table.h"
31 #include "brw_program.h"
32
33 #ifdef __cplusplus
34 #include "brw_ir_vec4.h"
35
36 extern "C" {
37 #endif
38
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "intel_asm_annotation.h"
42
43 #ifdef __cplusplus
44 }; /* extern "C" */
45 #endif
46
47 #include "glsl/ir.h"
48 #include "glsl/nir/nir.h"
49
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 const unsigned *
56 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
57 void *log_data,
58 void *mem_ctx,
59 const nir_shader *nir,
60 struct brw_vue_prog_data *prog_data,
61 const struct cfg_t *cfg,
62 unsigned *out_assembly_size);
63
64 #ifdef __cplusplus
65 } /* extern "C" */
66
67 namespace brw {
68
69 class vec4_live_variables;
70
71 /**
72 * The vertex shader front-end.
73 *
74 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
75 * fixed-function) into VS IR.
76 */
77 class vec4_visitor : public backend_shader
78 {
79 public:
80 vec4_visitor(const struct brw_compiler *compiler,
81 void *log_data,
82 const struct brw_sampler_prog_key_data *key,
83 struct brw_vue_prog_data *prog_data,
84 const nir_shader *shader,
85 void *mem_ctx,
86 bool no_spills,
87 int shader_time_index);
88 virtual ~vec4_visitor();
89
90 dst_reg dst_null_f()
91 {
92 return dst_reg(brw_null_reg());
93 }
94
95 dst_reg dst_null_d()
96 {
97 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
98 }
99
100 dst_reg dst_null_ud()
101 {
102 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
103 }
104
105 const struct brw_sampler_prog_key_data * const key_tex;
106 struct brw_vue_prog_data * const prog_data;
107 char *fail_msg;
108 bool failed;
109
110 /**
111 * GLSL IR currently being processed, which is associated with our
112 * driver IR instructions for debugging purposes.
113 */
114 const void *base_ir;
115 const char *current_annotation;
116
117 int first_non_payload_grf;
118 unsigned int max_grf;
119 int *virtual_grf_start;
120 int *virtual_grf_end;
121 brw::vec4_live_variables *live_intervals;
122 dst_reg userplane[MAX_CLIP_PLANES];
123
124 bool need_all_constants_in_pull_buffer;
125
126 /* Regs for vertex results. Generated at ir_variable visiting time
127 * for the ir->location's used.
128 */
129 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
130 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
131 int *uniform_size;
132 int uniform_array_size; /*< Size of the uniform_size array */
133 int uniforms;
134
135 src_reg shader_start_time;
136
137 bool run();
138 void fail(const char *msg, ...);
139
140 int setup_uniforms(int payload_reg);
141
142 bool reg_allocate_trivial();
143 bool reg_allocate();
144 void evaluate_spill_costs(float *spill_costs, bool *no_spill);
145 int choose_spill_reg(struct ra_graph *g);
146 void spill_reg(int spill_reg);
147 void move_grf_array_access_to_scratch();
148 void move_uniform_array_access_to_pull_constants();
149 void move_push_constants_to_pull_constants();
150 void split_uniform_registers();
151 void pack_uniform_registers();
152 void calculate_live_intervals();
153 void invalidate_live_intervals();
154 void split_virtual_grfs();
155 bool opt_vector_float();
156 bool opt_reduce_swizzle();
157 bool dead_code_eliminate();
158 int var_range_start(unsigned v, unsigned n) const;
159 int var_range_end(unsigned v, unsigned n) const;
160 bool virtual_grf_interferes(int a, int b);
161 bool opt_cmod_propagation();
162 bool opt_copy_propagation(bool do_constant_prop = true);
163 bool opt_cse_local(bblock_t *block);
164 bool opt_cse();
165 bool opt_algebraic();
166 bool opt_register_coalesce();
167 bool eliminate_find_live_channel();
168 bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
169 void opt_set_dependency_control();
170 void opt_schedule_instructions();
171 void convert_to_hw_regs();
172
173 vec4_instruction *emit(vec4_instruction *inst);
174
175 vec4_instruction *emit(enum opcode opcode);
176 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
177 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
178 const src_reg &src0);
179 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
180 const src_reg &src0, const src_reg &src1);
181 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
182 const src_reg &src0, const src_reg &src1,
183 const src_reg &src2);
184
185 vec4_instruction *emit_before(bblock_t *block,
186 vec4_instruction *inst,
187 vec4_instruction *new_inst);
188
189 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
190 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
191 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
192 EMIT1(MOV)
193 EMIT1(NOT)
194 EMIT1(RNDD)
195 EMIT1(RNDE)
196 EMIT1(RNDZ)
197 EMIT1(FRC)
198 EMIT1(F32TO16)
199 EMIT1(F16TO32)
200 EMIT2(ADD)
201 EMIT2(MUL)
202 EMIT2(MACH)
203 EMIT2(MAC)
204 EMIT2(AND)
205 EMIT2(OR)
206 EMIT2(XOR)
207 EMIT2(DP3)
208 EMIT2(DP4)
209 EMIT2(DPH)
210 EMIT2(SHL)
211 EMIT2(SHR)
212 EMIT2(ASR)
213 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
214 enum brw_conditional_mod condition);
215 vec4_instruction *IF(src_reg src0, src_reg src1,
216 enum brw_conditional_mod condition);
217 vec4_instruction *IF(enum brw_predicate predicate);
218 EMIT1(SCRATCH_READ)
219 EMIT2(SCRATCH_WRITE)
220 EMIT3(LRP)
221 EMIT1(BFREV)
222 EMIT3(BFE)
223 EMIT2(BFI1)
224 EMIT3(BFI2)
225 EMIT1(FBH)
226 EMIT1(FBL)
227 EMIT1(CBIT)
228 EMIT3(MAD)
229 EMIT2(ADDC)
230 EMIT2(SUBB)
231 #undef EMIT1
232 #undef EMIT2
233 #undef EMIT3
234
235 int implied_mrf_writes(vec4_instruction *inst);
236
237 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
238 src_reg src0, src_reg src1);
239
240 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
241 const src_reg &y, const src_reg &a);
242
243 /**
244 * Copy any live channel from \p src to the first channel of the
245 * result.
246 */
247 src_reg emit_uniformize(const src_reg &src);
248
249 src_reg fix_3src_operand(const src_reg &src);
250 src_reg resolve_source_modifiers(const src_reg &src);
251
252 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
253 const src_reg &src1 = src_reg());
254
255 src_reg fix_math_operand(const src_reg &src);
256
257 void emit_pack_half_2x16(dst_reg dst, src_reg src0);
258 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
259 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
260 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
261 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
262 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
263
264 void emit_texture(ir_texture_opcode op,
265 dst_reg dest,
266 const glsl_type *dest_type,
267 src_reg coordinate,
268 int coord_components,
269 src_reg shadow_comparitor,
270 src_reg lod, src_reg lod2,
271 src_reg sample_index,
272 uint32_t constant_offset,
273 src_reg offset_value,
274 src_reg mcs,
275 bool is_cube_array,
276 uint32_t sampler, src_reg sampler_reg);
277
278 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
279 src_reg sampler);
280 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
281
282 void emit_ndc_computation();
283 void emit_psiz_and_flags(dst_reg reg);
284 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying);
285 virtual void emit_urb_slot(dst_reg reg, int varying);
286
287 void emit_shader_time_begin();
288 void emit_shader_time_end();
289 void emit_shader_time_write(int shader_time_subindex, src_reg value);
290
291 void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
292 dst_reg dst, src_reg offset, src_reg src0,
293 src_reg src1);
294
295 void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
296 src_reg offset);
297
298 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
299 src_reg *reladdr, int reg_offset);
300 src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
301 src_reg *reladdr, int reg_offset);
302 void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
303 dst_reg dst,
304 src_reg orig_src,
305 int base_offset);
306 void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
307 int base_offset);
308 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
309 dst_reg dst,
310 src_reg orig_src,
311 int base_offset);
312 void emit_pull_constant_load_reg(dst_reg dst,
313 src_reg surf_index,
314 src_reg offset,
315 bblock_t *before_block,
316 vec4_instruction *before_inst);
317 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
318 vec4_instruction *inst, src_reg src);
319
320 void resolve_ud_negate(src_reg *reg);
321
322 src_reg get_timestamp();
323
324 void dump_instruction(backend_instruction *inst);
325 void dump_instruction(backend_instruction *inst, FILE *file);
326
327 bool is_high_sampler(src_reg sampler);
328
329 virtual void emit_nir_code();
330 virtual void nir_setup_inputs();
331 virtual void nir_setup_uniforms();
332 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
333 virtual void nir_setup_system_values();
334 virtual void nir_emit_impl(nir_function_impl *impl);
335 virtual void nir_emit_cf_list(exec_list *list);
336 virtual void nir_emit_if(nir_if *if_stmt);
337 virtual void nir_emit_loop(nir_loop *loop);
338 virtual void nir_emit_block(nir_block *block);
339 virtual void nir_emit_instr(nir_instr *instr);
340 virtual void nir_emit_load_const(nir_load_const_instr *instr);
341 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
342 virtual void nir_emit_alu(nir_alu_instr *instr);
343 virtual void nir_emit_jump(nir_jump_instr *instr);
344 virtual void nir_emit_texture(nir_tex_instr *instr);
345 virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
346 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
347
348 dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
349 dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
350 dst_reg get_nir_dest(nir_dest dest);
351 src_reg get_nir_src(nir_src src, enum brw_reg_type type,
352 unsigned num_components = 4);
353 src_reg get_nir_src(nir_src src, nir_alu_type type,
354 unsigned num_components = 4);
355 src_reg get_nir_src(nir_src src,
356 unsigned num_components = 4);
357
358 virtual dst_reg *make_reg_for_system_value(int location,
359 const glsl_type *type) = 0;
360
361 dst_reg *nir_locals;
362 dst_reg *nir_ssa_values;
363 src_reg *nir_inputs;
364 dst_reg *nir_system_values;
365
366 protected:
367 void emit_vertex();
368 void lower_attributes_to_hw_regs(const int *attribute_map,
369 bool interleaved);
370 void setup_payload_interference(struct ra_graph *g, int first_payload_node,
371 int reg_node_count);
372 virtual void setup_payload() = 0;
373 virtual void emit_prolog() = 0;
374 virtual void emit_thread_end() = 0;
375 virtual void emit_urb_write_header(int mrf) = 0;
376 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
377 virtual void gs_emit_vertex(int stream_id);
378 virtual void gs_end_primitive();
379
380 private:
381 /**
382 * If true, then register allocation should fail instead of spilling.
383 */
384 const bool no_spills;
385
386 int shader_time_index;
387
388 unsigned last_scratch; /**< measured in 32-byte (register size) units */
389 };
390
391 } /* namespace brw */
392 #endif /* __cplusplus */
393
394 #endif /* BRW_VEC4_H */