anv: add no reloc flags on empty and simple bo paths.
[mesa.git] / src / intel / compiler / brw_ir_vec4.h
1 /* -*- c++ -*- */
2 /*
3 * Copyright © 2011-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef BRW_IR_VEC4_H
26 #define BRW_IR_VEC4_H
27
28 #include "brw_shader.h"
29
30 namespace brw {
31
32 class dst_reg;
33
34 class src_reg : public backend_reg
35 {
36 public:
37 DECLARE_RALLOC_CXX_OPERATORS(src_reg)
38
39 void init();
40
41 src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
42 src_reg();
43 src_reg(struct ::brw_reg reg);
44
45 bool equals(const src_reg &r) const;
46 bool negative_equals(const src_reg &r) const;
47
48 src_reg(class vec4_visitor *v, const struct glsl_type *type);
49 src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
50
51 explicit src_reg(const dst_reg &reg);
52
53 src_reg *reladdr;
54 };
55
56 static inline src_reg
57 retype(src_reg reg, enum brw_reg_type type)
58 {
59 reg.type = type;
60 return reg;
61 }
62
63 namespace detail {
64
65 static inline void
66 add_byte_offset(backend_reg *reg, unsigned bytes)
67 {
68 switch (reg->file) {
69 case BAD_FILE:
70 break;
71 case VGRF:
72 case ATTR:
73 case UNIFORM:
74 reg->offset += bytes;
75 assert(reg->offset % 16 == 0);
76 break;
77 case MRF: {
78 const unsigned suboffset = reg->offset + bytes;
79 reg->nr += suboffset / REG_SIZE;
80 reg->offset = suboffset % REG_SIZE;
81 assert(reg->offset % 16 == 0);
82 break;
83 }
84 case ARF:
85 case FIXED_GRF: {
86 const unsigned suboffset = reg->subnr + bytes;
87 reg->nr += suboffset / REG_SIZE;
88 reg->subnr = suboffset % REG_SIZE;
89 assert(reg->subnr % 16 == 0);
90 break;
91 }
92 default:
93 assert(bytes == 0);
94 }
95 }
96
97 } /* namepace detail */
98
99 static inline src_reg
100 byte_offset(src_reg reg, unsigned bytes)
101 {
102 detail::add_byte_offset(&reg, bytes);
103 return reg;
104 }
105
106 static inline src_reg
107 offset(src_reg reg, unsigned width, unsigned delta)
108 {
109 const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
110 const unsigned num_components = MAX2(width / 4 * stride, 4);
111 return byte_offset(reg, num_components * type_sz(reg.type) * delta);
112 }
113
114 static inline src_reg
115 horiz_offset(src_reg reg, unsigned delta)
116 {
117 return byte_offset(reg, delta * type_sz(reg.type));
118 }
119
120 /**
121 * Reswizzle a given source register.
122 * \sa brw_swizzle().
123 */
124 static inline src_reg
125 swizzle(src_reg reg, unsigned swizzle)
126 {
127 if (reg.file == IMM)
128 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
129 else
130 reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
131
132 return reg;
133 }
134
135 static inline src_reg
136 negate(src_reg reg)
137 {
138 assert(reg.file != IMM);
139 reg.negate = !reg.negate;
140 return reg;
141 }
142
143 static inline bool
144 is_uniform(const src_reg &reg)
145 {
146 return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
147 (!reg.reladdr || is_uniform(*reg.reladdr));
148 }
149
150 class dst_reg : public backend_reg
151 {
152 public:
153 DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
154
155 void init();
156
157 dst_reg();
158 dst_reg(enum brw_reg_file file, int nr);
159 dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
160 unsigned writemask);
161 dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
162 unsigned writemask);
163 dst_reg(struct ::brw_reg reg);
164 dst_reg(class vec4_visitor *v, const struct glsl_type *type);
165
166 explicit dst_reg(const src_reg &reg);
167
168 bool equals(const dst_reg &r) const;
169
170 src_reg *reladdr;
171 };
172
173 static inline dst_reg
174 retype(dst_reg reg, enum brw_reg_type type)
175 {
176 reg.type = type;
177 return reg;
178 }
179
180 static inline dst_reg
181 byte_offset(dst_reg reg, unsigned bytes)
182 {
183 detail::add_byte_offset(&reg, bytes);
184 return reg;
185 }
186
187 static inline dst_reg
188 offset(dst_reg reg, unsigned width, unsigned delta)
189 {
190 const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
191 const unsigned num_components = MAX2(width / 4 * stride, 4);
192 return byte_offset(reg, num_components * type_sz(reg.type) * delta);
193 }
194
195 static inline dst_reg
196 horiz_offset(const dst_reg &reg, unsigned delta)
197 {
198 if (is_uniform(src_reg(reg)))
199 return reg;
200 else
201 return byte_offset(reg, delta * type_sz(reg.type));
202 }
203
204 static inline dst_reg
205 writemask(dst_reg reg, unsigned mask)
206 {
207 assert(reg.file != IMM);
208 assert((reg.writemask & mask) != 0);
209 reg.writemask &= mask;
210 return reg;
211 }
212
213 /**
214 * Return an integer identifying the discrete address space a register is
215 * contained in. A register is by definition fully contained in the single
216 * reg_space it belongs to, so two registers with different reg_space ids are
217 * guaranteed not to overlap. Most register files are a single reg_space of
218 * its own, only the VGRF file is composed of multiple discrete address
219 * spaces, one for each VGRF allocation.
220 */
221 static inline uint32_t
222 reg_space(const backend_reg &r)
223 {
224 return r.file << 16 | (r.file == VGRF ? r.nr : 0);
225 }
226
227 /**
228 * Return the base offset in bytes of a register relative to the start of its
229 * reg_space().
230 */
231 static inline unsigned
232 reg_offset(const backend_reg &r)
233 {
234 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
235 (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
236 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
237 }
238
239 /**
240 * Return whether the register region starting at \p r and spanning \p dr
241 * bytes could potentially overlap the register region starting at \p s and
242 * spanning \p ds bytes.
243 */
244 static inline bool
245 regions_overlap(const backend_reg &r, unsigned dr,
246 const backend_reg &s, unsigned ds)
247 {
248 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
249 /* COMPR4 regions are translated by the hardware during decompression
250 * into two separate half-regions 4 MRFs apart from each other.
251 */
252 backend_reg t0 = r;
253 t0.nr &= ~BRW_MRF_COMPR4;
254 backend_reg t1 = t0;
255 t1.offset += 4 * REG_SIZE;
256 return regions_overlap(t0, dr / 2, s, ds) ||
257 regions_overlap(t1, dr / 2, s, ds);
258
259 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
260 return regions_overlap(s, ds, r, dr);
261
262 } else {
263 return reg_space(r) == reg_space(s) &&
264 !(reg_offset(r) + dr <= reg_offset(s) ||
265 reg_offset(s) + ds <= reg_offset(r));
266 }
267 }
268
269 class vec4_instruction : public backend_instruction {
270 public:
271 DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
272
273 vec4_instruction(enum opcode opcode,
274 const dst_reg &dst = dst_reg(),
275 const src_reg &src0 = src_reg(),
276 const src_reg &src1 = src_reg(),
277 const src_reg &src2 = src_reg());
278
279 dst_reg dst;
280 src_reg src[3];
281
282 enum brw_urb_write_flags urb_write_flags;
283
284 unsigned sol_binding; /**< gen6: SOL binding table index */
285 bool sol_final_write; /**< gen6: send commit message */
286 unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
287
288 bool is_send_from_grf() const;
289 unsigned size_read(unsigned arg) const;
290 bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask,
291 int swizzle, int swizzle_mask);
292 void reswizzle(int dst_writemask, int swizzle);
293 bool can_do_source_mods(const struct gen_device_info *devinfo);
294 bool can_do_cmod();
295 bool can_do_writemask(const struct gen_device_info *devinfo);
296 bool can_change_types() const;
297 bool has_source_and_destination_hazard() const;
298 unsigned implied_mrf_writes() const;
299
300 bool is_align1_partial_write()
301 {
302 return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
303 opcode == VEC4_OPCODE_SET_HIGH_32BIT;
304 }
305
306 bool reads_flag() const
307 {
308 return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
309 }
310
311 bool reads_flag(unsigned c)
312 {
313 if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
314 return true;
315
316 switch (predicate) {
317 case BRW_PREDICATE_NONE:
318 return false;
319 case BRW_PREDICATE_ALIGN16_REPLICATE_X:
320 return c == 0;
321 case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
322 return c == 1;
323 case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
324 return c == 2;
325 case BRW_PREDICATE_ALIGN16_REPLICATE_W:
326 return c == 3;
327 default:
328 return true;
329 }
330 }
331
332 bool writes_flag() const
333 {
334 return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
335 opcode != BRW_OPCODE_CSEL &&
336 opcode != BRW_OPCODE_IF &&
337 opcode != BRW_OPCODE_WHILE));
338 }
339
340 bool reads_g0_implicitly() const
341 {
342 switch (opcode) {
343 case SHADER_OPCODE_TEX:
344 case SHADER_OPCODE_TXL:
345 case SHADER_OPCODE_TXD:
346 case SHADER_OPCODE_TXF:
347 case SHADER_OPCODE_TXF_CMS_W:
348 case SHADER_OPCODE_TXF_CMS:
349 case SHADER_OPCODE_TXF_MCS:
350 case SHADER_OPCODE_TXS:
351 case SHADER_OPCODE_TG4:
352 case SHADER_OPCODE_TG4_OFFSET:
353 case SHADER_OPCODE_SAMPLEINFO:
354 case VS_OPCODE_PULL_CONSTANT_LOAD:
355 case GS_OPCODE_SET_PRIMITIVE_ID:
356 case GS_OPCODE_GET_INSTANCE_ID:
357 case SHADER_OPCODE_GEN4_SCRATCH_READ:
358 case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
359 return true;
360 default:
361 return false;
362 }
363 }
364 };
365
366 /**
367 * Make the execution of \p inst dependent on the evaluation of a possibly
368 * inverted predicate.
369 */
370 inline vec4_instruction *
371 set_predicate_inv(enum brw_predicate pred, bool inverse,
372 vec4_instruction *inst)
373 {
374 inst->predicate = pred;
375 inst->predicate_inverse = inverse;
376 return inst;
377 }
378
379 /**
380 * Make the execution of \p inst dependent on the evaluation of a predicate.
381 */
382 inline vec4_instruction *
383 set_predicate(enum brw_predicate pred, vec4_instruction *inst)
384 {
385 return set_predicate_inv(pred, false, inst);
386 }
387
388 /**
389 * Write the result of evaluating the condition given by \p mod to a flag
390 * register.
391 */
392 inline vec4_instruction *
393 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
394 {
395 inst->conditional_mod = mod;
396 return inst;
397 }
398
399 /**
400 * Clamp the result of \p inst to the saturation range of its destination
401 * datatype.
402 */
403 inline vec4_instruction *
404 set_saturate(bool saturate, vec4_instruction *inst)
405 {
406 inst->saturate = saturate;
407 return inst;
408 }
409
410 /**
411 * Return the number of dataflow registers written by the instruction (either
412 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
413 * register_size)'. The somewhat arbitrary register size unit is 16B for the
414 * UNIFORM and IMM files and 32B for all other files.
415 */
416 inline unsigned
417 regs_written(const vec4_instruction *inst)
418 {
419 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
420 return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
421 REG_SIZE);
422 }
423
424 /**
425 * Return the number of dataflow registers read by the instruction (either
426 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
427 * register_size)'. The somewhat arbitrary register size unit is 16B for the
428 * UNIFORM and IMM files and 32B for all other files.
429 */
430 inline unsigned
431 regs_read(const vec4_instruction *inst, unsigned i)
432 {
433 const unsigned reg_size =
434 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
435 return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
436 reg_size);
437 }
438
439 static inline enum brw_reg_type
440 get_exec_type(const vec4_instruction *inst)
441 {
442 enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
443
444 for (int i = 0; i < 3; i++) {
445 if (inst->src[i].file != BAD_FILE) {
446 const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
447 if (type_sz(t) > type_sz(exec_type))
448 exec_type = t;
449 else if (type_sz(t) == type_sz(exec_type) &&
450 brw_reg_type_is_floating_point(t))
451 exec_type = t;
452 }
453 }
454
455 if (exec_type == BRW_REGISTER_TYPE_B)
456 exec_type = inst->dst.type;
457
458 /* TODO: We need to handle half-float conversions. */
459 assert(exec_type != BRW_REGISTER_TYPE_HF ||
460 inst->dst.type == BRW_REGISTER_TYPE_HF);
461 assert(exec_type != BRW_REGISTER_TYPE_B);
462
463 return exec_type;
464 }
465
466 static inline unsigned
467 get_exec_type_size(const vec4_instruction *inst)
468 {
469 return type_sz(get_exec_type(inst));
470 }
471
472 } /* namespace brw */
473
474 #endif