i965/vec4: don't do horizontal stride on some register file types
[mesa.git] / src / intel / compiler / brw_ir_vec4.h
1 /* -*- c++ -*- */
2 /*
3 * Copyright © 2011-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef BRW_IR_VEC4_H
26 #define BRW_IR_VEC4_H
27
28 #include "brw_shader.h"
29
30 namespace brw {
31
32 class dst_reg;
33
34 class src_reg : public backend_reg
35 {
36 public:
37 DECLARE_RALLOC_CXX_OPERATORS(src_reg)
38
39 void init();
40
41 src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
42 src_reg();
43 src_reg(struct ::brw_reg reg);
44
45 bool equals(const src_reg &r) const;
46
47 src_reg(class vec4_visitor *v, const struct glsl_type *type);
48 src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
49
50 explicit src_reg(const dst_reg &reg);
51
52 src_reg *reladdr;
53 };
54
55 static inline src_reg
56 retype(src_reg reg, enum brw_reg_type type)
57 {
58 reg.type = type;
59 return reg;
60 }
61
62 namespace detail {
63
64 static inline void
65 add_byte_offset(backend_reg *reg, unsigned bytes)
66 {
67 switch (reg->file) {
68 case BAD_FILE:
69 break;
70 case VGRF:
71 case ATTR:
72 case UNIFORM:
73 reg->offset += bytes;
74 assert(reg->offset % 16 == 0);
75 break;
76 case MRF: {
77 const unsigned suboffset = reg->offset + bytes;
78 reg->nr += suboffset / REG_SIZE;
79 reg->offset = suboffset % REG_SIZE;
80 assert(reg->offset % 16 == 0);
81 break;
82 }
83 case ARF:
84 case FIXED_GRF: {
85 const unsigned suboffset = reg->subnr + bytes;
86 reg->nr += suboffset / REG_SIZE;
87 reg->subnr = suboffset % REG_SIZE;
88 assert(reg->subnr % 16 == 0);
89 break;
90 }
91 default:
92 assert(bytes == 0);
93 }
94 }
95
96 } /* namepace detail */
97
98 static inline src_reg
99 byte_offset(src_reg reg, unsigned bytes)
100 {
101 detail::add_byte_offset(&reg, bytes);
102 return reg;
103 }
104
105 static inline src_reg
106 offset(src_reg reg, unsigned width, unsigned delta)
107 {
108 const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
109 const unsigned num_components = MAX2(width / 4 * stride, 4);
110 return byte_offset(reg, num_components * type_sz(reg.type) * delta);
111 }
112
113 static inline src_reg
114 horiz_offset(src_reg reg, unsigned delta)
115 {
116 return byte_offset(reg, delta * type_sz(reg.type));
117 }
118
119 /**
120 * Reswizzle a given source register.
121 * \sa brw_swizzle().
122 */
123 static inline src_reg
124 swizzle(src_reg reg, unsigned swizzle)
125 {
126 if (reg.file == IMM)
127 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
128 else
129 reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
130
131 return reg;
132 }
133
134 static inline src_reg
135 negate(src_reg reg)
136 {
137 assert(reg.file != IMM);
138 reg.negate = !reg.negate;
139 return reg;
140 }
141
142 static inline bool
143 is_uniform(const src_reg &reg)
144 {
145 return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
146 (!reg.reladdr || is_uniform(*reg.reladdr));
147 }
148
149 class dst_reg : public backend_reg
150 {
151 public:
152 DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
153
154 void init();
155
156 dst_reg();
157 dst_reg(enum brw_reg_file file, int nr);
158 dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
159 unsigned writemask);
160 dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
161 unsigned writemask);
162 dst_reg(struct ::brw_reg reg);
163 dst_reg(class vec4_visitor *v, const struct glsl_type *type);
164
165 explicit dst_reg(const src_reg &reg);
166
167 bool equals(const dst_reg &r) const;
168
169 src_reg *reladdr;
170 };
171
172 static inline dst_reg
173 retype(dst_reg reg, enum brw_reg_type type)
174 {
175 reg.type = type;
176 return reg;
177 }
178
179 static inline dst_reg
180 byte_offset(dst_reg reg, unsigned bytes)
181 {
182 detail::add_byte_offset(&reg, bytes);
183 return reg;
184 }
185
186 static inline dst_reg
187 offset(dst_reg reg, unsigned width, unsigned delta)
188 {
189 const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
190 const unsigned num_components = MAX2(width / 4 * stride, 4);
191 return byte_offset(reg, num_components * type_sz(reg.type) * delta);
192 }
193
194 static inline dst_reg
195 horiz_offset(const dst_reg &reg, unsigned delta)
196 {
197 if (is_uniform(src_reg(reg)))
198 return reg;
199 else
200 return byte_offset(reg, delta * type_sz(reg.type));
201 }
202
203 static inline dst_reg
204 writemask(dst_reg reg, unsigned mask)
205 {
206 assert(reg.file != IMM);
207 assert((reg.writemask & mask) != 0);
208 reg.writemask &= mask;
209 return reg;
210 }
211
212 /**
213 * Return an integer identifying the discrete address space a register is
214 * contained in. A register is by definition fully contained in the single
215 * reg_space it belongs to, so two registers with different reg_space ids are
216 * guaranteed not to overlap. Most register files are a single reg_space of
217 * its own, only the VGRF file is composed of multiple discrete address
218 * spaces, one for each VGRF allocation.
219 */
220 static inline uint32_t
221 reg_space(const backend_reg &r)
222 {
223 return r.file << 16 | (r.file == VGRF ? r.nr : 0);
224 }
225
226 /**
227 * Return the base offset in bytes of a register relative to the start of its
228 * reg_space().
229 */
230 static inline unsigned
231 reg_offset(const backend_reg &r)
232 {
233 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
234 (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
235 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
236 }
237
238 /**
239 * Return whether the register region starting at \p r and spanning \p dr
240 * bytes could potentially overlap the register region starting at \p s and
241 * spanning \p ds bytes.
242 */
243 static inline bool
244 regions_overlap(const backend_reg &r, unsigned dr,
245 const backend_reg &s, unsigned ds)
246 {
247 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
248 /* COMPR4 regions are translated by the hardware during decompression
249 * into two separate half-regions 4 MRFs apart from each other.
250 */
251 backend_reg t0 = r;
252 t0.nr &= ~BRW_MRF_COMPR4;
253 backend_reg t1 = t0;
254 t1.offset += 4 * REG_SIZE;
255 return regions_overlap(t0, dr / 2, s, ds) ||
256 regions_overlap(t1, dr / 2, s, ds);
257
258 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
259 return regions_overlap(s, ds, r, dr);
260
261 } else {
262 return reg_space(r) == reg_space(s) &&
263 !(reg_offset(r) + dr <= reg_offset(s) ||
264 reg_offset(s) + ds <= reg_offset(r));
265 }
266 }
267
268 class vec4_instruction : public backend_instruction {
269 public:
270 DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
271
272 vec4_instruction(enum opcode opcode,
273 const dst_reg &dst = dst_reg(),
274 const src_reg &src0 = src_reg(),
275 const src_reg &src1 = src_reg(),
276 const src_reg &src2 = src_reg());
277
278 dst_reg dst;
279 src_reg src[3];
280
281 enum brw_urb_write_flags urb_write_flags;
282
283 unsigned sol_binding; /**< gen6: SOL binding table index */
284 bool sol_final_write; /**< gen6: send commit message */
285 unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
286
287 bool is_send_from_grf();
288 unsigned size_read(unsigned arg) const;
289 bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask,
290 int swizzle, int swizzle_mask);
291 void reswizzle(int dst_writemask, int swizzle);
292 bool can_do_source_mods(const struct gen_device_info *devinfo);
293 bool can_do_writemask(const struct gen_device_info *devinfo);
294 bool can_change_types() const;
295 bool has_source_and_destination_hazard() const;
296
297 bool is_align1_partial_write()
298 {
299 return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
300 opcode == VEC4_OPCODE_SET_HIGH_32BIT;
301 }
302
303 bool reads_flag()
304 {
305 return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
306 }
307
308 bool reads_flag(unsigned c)
309 {
310 if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
311 return true;
312
313 switch (predicate) {
314 case BRW_PREDICATE_NONE:
315 return false;
316 case BRW_PREDICATE_ALIGN16_REPLICATE_X:
317 return c == 0;
318 case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
319 return c == 1;
320 case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
321 return c == 2;
322 case BRW_PREDICATE_ALIGN16_REPLICATE_W:
323 return c == 3;
324 default:
325 return true;
326 }
327 }
328
329 bool writes_flag()
330 {
331 return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
332 opcode != BRW_OPCODE_IF &&
333 opcode != BRW_OPCODE_WHILE));
334 }
335 };
336
337 /**
338 * Make the execution of \p inst dependent on the evaluation of a possibly
339 * inverted predicate.
340 */
341 inline vec4_instruction *
342 set_predicate_inv(enum brw_predicate pred, bool inverse,
343 vec4_instruction *inst)
344 {
345 inst->predicate = pred;
346 inst->predicate_inverse = inverse;
347 return inst;
348 }
349
350 /**
351 * Make the execution of \p inst dependent on the evaluation of a predicate.
352 */
353 inline vec4_instruction *
354 set_predicate(enum brw_predicate pred, vec4_instruction *inst)
355 {
356 return set_predicate_inv(pred, false, inst);
357 }
358
359 /**
360 * Write the result of evaluating the condition given by \p mod to a flag
361 * register.
362 */
363 inline vec4_instruction *
364 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
365 {
366 inst->conditional_mod = mod;
367 return inst;
368 }
369
370 /**
371 * Clamp the result of \p inst to the saturation range of its destination
372 * datatype.
373 */
374 inline vec4_instruction *
375 set_saturate(bool saturate, vec4_instruction *inst)
376 {
377 inst->saturate = saturate;
378 return inst;
379 }
380
381 /**
382 * Return the number of dataflow registers written by the instruction (either
383 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
384 * register_size)'. The somewhat arbitrary register size unit is 16B for the
385 * UNIFORM and IMM files and 32B for all other files.
386 */
387 inline unsigned
388 regs_written(const vec4_instruction *inst)
389 {
390 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
391 return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
392 REG_SIZE);
393 }
394
395 /**
396 * Return the number of dataflow registers read by the instruction (either
397 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
398 * register_size)'. The somewhat arbitrary register size unit is 16B for the
399 * UNIFORM and IMM files and 32B for all other files.
400 */
401 inline unsigned
402 regs_read(const vec4_instruction *inst, unsigned i)
403 {
404 const unsigned reg_size =
405 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
406 return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
407 reg_size);
408 }
409
410 static inline enum brw_reg_type
411 get_exec_type(const vec4_instruction *inst)
412 {
413 enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
414
415 for (int i = 0; i < 3; i++) {
416 if (inst->src[i].file != BAD_FILE) {
417 const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
418 if (type_sz(t) > type_sz(exec_type))
419 exec_type = t;
420 else if (type_sz(t) == type_sz(exec_type) &&
421 brw_reg_type_is_floating_point(t))
422 exec_type = t;
423 }
424 }
425
426 if (exec_type == BRW_REGISTER_TYPE_B)
427 exec_type = inst->dst.type;
428
429 /* TODO: We need to handle half-float conversions. */
430 assert(exec_type != BRW_REGISTER_TYPE_HF ||
431 inst->dst.type == BRW_REGISTER_TYPE_HF);
432 assert(exec_type != BRW_REGISTER_TYPE_B);
433
434 return exec_type;
435 }
436
437 static inline unsigned
438 get_exec_type_size(const vec4_instruction *inst)
439 {
440 return type_sz(get_exec_type(inst));
441 }
442
443 } /* namespace brw */
444
445 #endif