i965/fs: Handle fixed HW GRF subnr in reg_offset().
[mesa.git] / src / mesa / drivers / dri / i965 / brw_ir_fs.h
1 /* -*- c++ -*- */
2 /*
3 * Copyright © 2010-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef BRW_IR_FS_H
26 #define BRW_IR_FS_H
27
28 #include "brw_shader.h"
29
30 class fs_inst;
31
32 class fs_reg : public backend_reg {
33 public:
34 DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35
36 void init();
37
38 fs_reg();
39 fs_reg(struct ::brw_reg reg);
40 fs_reg(enum brw_reg_file file, int nr);
41 fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
42
43 bool equals(const fs_reg &r) const;
44 bool is_contiguous() const;
45
46 /**
47 * Return the size in bytes of a single logical component of the
48 * register assuming the given execution width.
49 */
50 unsigned component_size(unsigned width) const;
51
52 /** Smear a channel of the reg to all channels. */
53 fs_reg &set_smear(unsigned subreg);
54
55 /** Register region horizontal stride */
56 uint8_t stride;
57 };
58
59 static inline fs_reg
60 negate(fs_reg reg)
61 {
62 assert(reg.file != IMM);
63 reg.negate = !reg.negate;
64 return reg;
65 }
66
67 static inline fs_reg
68 retype(fs_reg reg, enum brw_reg_type type)
69 {
70 reg.type = type;
71 return reg;
72 }
73
74 static inline fs_reg
75 byte_offset(fs_reg reg, unsigned delta)
76 {
77 switch (reg.file) {
78 case BAD_FILE:
79 break;
80 case VGRF:
81 case ATTR:
82 case UNIFORM: {
83 const unsigned reg_size = (reg.file == UNIFORM ? 4 : REG_SIZE);
84 const unsigned suboffset = reg.offset % reg_size + delta;
85 reg.offset += ROUND_DOWN_TO(suboffset, reg_size);
86 reg.offset = ROUND_DOWN_TO(reg.offset, reg_size) + suboffset % reg_size;
87 break;
88 }
89 case MRF: {
90 const unsigned suboffset = reg.offset % REG_SIZE + delta;
91 reg.nr += suboffset / REG_SIZE;
92 reg.offset = ROUND_DOWN_TO(reg.offset, REG_SIZE) + suboffset % REG_SIZE;
93 break;
94 }
95 case ARF:
96 case FIXED_GRF: {
97 const unsigned suboffset = reg.subnr + delta;
98 reg.nr += suboffset / REG_SIZE;
99 reg.subnr = suboffset % REG_SIZE;
100 break;
101 }
102 case IMM:
103 default:
104 assert(delta == 0);
105 }
106 return reg;
107 }
108
109 static inline fs_reg
110 horiz_offset(const fs_reg &reg, unsigned delta)
111 {
112 switch (reg.file) {
113 case BAD_FILE:
114 case UNIFORM:
115 case IMM:
116 /* These only have a single component that is implicitly splatted. A
117 * horizontal offset should be a harmless no-op.
118 * XXX - Handle vector immediates correctly.
119 */
120 return reg;
121 case VGRF:
122 case MRF:
123 case ATTR:
124 return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
125 case ARF:
126 case FIXED_GRF:
127 if (reg.is_null()) {
128 return reg;
129 } else {
130 const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
131 return byte_offset(reg, delta * stride * type_sz(reg.type));
132 }
133 }
134 unreachable("Invalid register file");
135 }
136
137 static inline fs_reg
138 offset(fs_reg reg, unsigned width, unsigned delta)
139 {
140 switch (reg.file) {
141 case BAD_FILE:
142 break;
143 case ARF:
144 case FIXED_GRF:
145 case MRF:
146 case VGRF:
147 case ATTR:
148 case UNIFORM:
149 return byte_offset(reg, delta * reg.component_size(width));
150 case IMM:
151 assert(delta == 0);
152 }
153 return reg;
154 }
155
156 /**
157 * Get the scalar channel of \p reg given by \p idx and replicate it to all
158 * channels of the result.
159 */
160 static inline fs_reg
161 component(fs_reg reg, unsigned idx)
162 {
163 reg = horiz_offset(reg, idx);
164 reg.stride = 0;
165 return reg;
166 }
167
168 /**
169 * Return an integer identifying the discrete address space a register is
170 * contained in. A register is by definition fully contained in the single
171 * reg_space it belongs to, so two registers with different reg_space ids are
172 * guaranteed not to overlap. Most register files are a single reg_space of
173 * its own, only the VGRF file is composed of multiple discrete address
174 * spaces, one for each VGRF allocation.
175 */
176 static inline uint32_t
177 reg_space(const fs_reg &r)
178 {
179 return r.file << 16 | (r.file == VGRF ? r.nr : 0);
180 }
181
182 /**
183 * Return the base offset in bytes of a register relative to the start of its
184 * reg_space().
185 */
186 static inline unsigned
187 reg_offset(const fs_reg &r)
188 {
189 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
190 (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
191 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
192 }
193
194 /**
195 * Return whether the register region starting at \p r and spanning \p dr
196 * bytes could potentially overlap the register region starting at \p s and
197 * spanning \p ds bytes.
198 */
199 static inline bool
200 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
201 {
202 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
203 fs_reg t = r;
204 t.nr &= ~BRW_MRF_COMPR4;
205 /* COMPR4 regions are translated by the hardware during decompression
206 * into two separate half-regions 4 MRFs apart from each other.
207 */
208 return regions_overlap(t, dr / 2, s, ds) ||
209 regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
210
211 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
212 return regions_overlap(s, ds, r, dr);
213
214 } else {
215 return reg_space(r) == reg_space(s) &&
216 !(reg_offset(r) + dr <= reg_offset(s) ||
217 reg_offset(s) + ds <= reg_offset(r));
218 }
219 }
220
221 /**
222 * Return whether the given register region is n-periodic, i.e. whether the
223 * original region remains invariant after shifting it by \p n scalar
224 * channels.
225 */
226 static inline bool
227 is_periodic(const fs_reg &reg, unsigned n)
228 {
229 if (reg.file == BAD_FILE || reg.is_null()) {
230 return true;
231
232 } else if (reg.file == IMM) {
233 const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
234 reg.type == BRW_REGISTER_TYPE_V ? 8 :
235 reg.type == BRW_REGISTER_TYPE_VF ? 4 :
236 1);
237 return n % period == 0;
238
239 } else if (reg.file == ARF || reg.file == FIXED_GRF) {
240 const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
241 reg.vstride == 0 ? 1 << reg.width :
242 ~0);
243 return n % period == 0;
244
245 } else {
246 return reg.stride == 0;
247 }
248 }
249
250 static inline bool
251 is_uniform(const fs_reg &reg)
252 {
253 return is_periodic(reg, 1);
254 }
255
256 /**
257 * Get the specified 8-component quarter of a register.
258 * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
259 */
260 static inline fs_reg
261 half(const fs_reg &reg, unsigned idx)
262 {
263 assert(idx < 2);
264 return horiz_offset(reg, 8 * idx);
265 }
266
267 /**
268 * Reinterpret each channel of register \p reg as a vector of values of the
269 * given smaller type and take the i-th subcomponent from each.
270 */
271 static inline fs_reg
272 subscript(fs_reg reg, brw_reg_type type, unsigned i)
273 {
274 assert((i + 1) * type_sz(type) <= type_sz(reg.type));
275
276 if (reg.file == ARF || reg.file == FIXED_GRF) {
277 /* The stride is encoded inconsistently for fixed GRF and ARF registers
278 * as the log2 of the actual vertical and horizontal strides.
279 */
280 const int delta = _mesa_logbase2(type_sz(reg.type)) -
281 _mesa_logbase2(type_sz(type));
282 reg.hstride += (reg.hstride ? delta : 0);
283 reg.vstride += (reg.vstride ? delta : 0);
284
285 } else if (reg.file == IMM) {
286 assert(reg.type == type);
287
288 } else {
289 reg.stride *= type_sz(reg.type) / type_sz(type);
290 }
291
292 return byte_offset(retype(reg, type), i * type_sz(type));
293 }
294
295 static const fs_reg reg_undef;
296
297 class fs_inst : public backend_instruction {
298 fs_inst &operator=(const fs_inst &);
299
300 void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
301 const fs_reg *src, unsigned sources);
302
303 public:
304 DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
305
306 fs_inst();
307 fs_inst(enum opcode opcode, uint8_t exec_size);
308 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
309 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
310 const fs_reg &src0);
311 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
312 const fs_reg &src0, const fs_reg &src1);
313 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
314 const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
315 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
316 const fs_reg src[], unsigned sources);
317 fs_inst(const fs_inst &that);
318 ~fs_inst();
319
320 void resize_sources(uint8_t num_sources);
321
322 bool equals(fs_inst *inst) const;
323 bool overwrites_reg(const fs_reg &reg) const;
324 bool is_send_from_grf() const;
325 bool is_partial_write() const;
326 bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
327 unsigned components_read(unsigned i) const;
328 int size_read(int arg) const;
329 bool can_do_source_mods(const struct gen_device_info *devinfo);
330 bool can_change_types() const;
331 bool has_side_effects() const;
332 bool has_source_and_destination_hazard() const;
333
334 /**
335 * Return the subset of flag registers read by the instruction as a bitset
336 * with byte granularity.
337 */
338 unsigned flags_read(const gen_device_info *devinfo) const;
339
340 /**
341 * Return the subset of flag registers updated by the instruction (either
342 * partially or fully) as a bitset with byte granularity.
343 */
344 unsigned flags_written() const;
345
346 fs_reg dst;
347 fs_reg *src;
348
349 uint8_t sources; /**< Number of fs_reg sources. */
350
351 /**
352 * Execution size of the instruction. This is used by the generator to
353 * generate the correct binary for the given fs_inst. Current valid
354 * values are 1, 8, 16.
355 */
356 uint8_t exec_size;
357
358 /**
359 * Channel group from the hardware execution and predication mask that
360 * should be applied to the instruction. The subset of channel enable
361 * signals (calculated from the EU control flow and predication state)
362 * given by [group, group + exec_size) will be used to mask GRF writes and
363 * any other side effects of the instruction.
364 */
365 uint8_t group;
366
367 bool eot:1;
368 bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
369 };
370
371 /**
372 * Make the execution of \p inst dependent on the evaluation of a possibly
373 * inverted predicate.
374 */
375 static inline fs_inst *
376 set_predicate_inv(enum brw_predicate pred, bool inverse,
377 fs_inst *inst)
378 {
379 inst->predicate = pred;
380 inst->predicate_inverse = inverse;
381 return inst;
382 }
383
384 /**
385 * Make the execution of \p inst dependent on the evaluation of a predicate.
386 */
387 static inline fs_inst *
388 set_predicate(enum brw_predicate pred, fs_inst *inst)
389 {
390 return set_predicate_inv(pred, false, inst);
391 }
392
393 /**
394 * Write the result of evaluating the condition given by \p mod to a flag
395 * register.
396 */
397 static inline fs_inst *
398 set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
399 {
400 inst->conditional_mod = mod;
401 return inst;
402 }
403
404 /**
405 * Clamp the result of \p inst to the saturation range of its destination
406 * datatype.
407 */
408 static inline fs_inst *
409 set_saturate(bool saturate, fs_inst *inst)
410 {
411 inst->saturate = saturate;
412 return inst;
413 }
414
415 /**
416 * Return the number of dataflow registers written by the instruction (either
417 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
418 * register_size)'. The somewhat arbitrary register size unit is 4B for the
419 * UNIFORM and IMM files and 32B for all other files.
420 */
421 inline unsigned
422 regs_written(const fs_inst *inst)
423 {
424 /* XXX - Take into account register-misaligned offsets correctly. */
425 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
426 return DIV_ROUND_UP(inst->size_written, REG_SIZE);
427 }
428
429 /**
430 * Return the number of dataflow registers read by the instruction (either
431 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
432 * register_size)'. The somewhat arbitrary register size unit is 4B for the
433 * UNIFORM and IMM files and 32B for all other files.
434 */
435 inline unsigned
436 regs_read(const fs_inst *inst, unsigned i)
437 {
438 /* XXX - Take into account register-misaligned offsets correctly. */
439 const unsigned reg_size =
440 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
441 return DIV_ROUND_UP(inst->size_read(i), reg_size);
442 }
443
444 #endif