i965/nir: Sort uniforms direct-first and use two different uniform registers
[mesa.git] / src / mesa / drivers / dri / i965 / brw_ir_fs.h
1 /* -*- c++ -*- */
2 /*
3 * Copyright © 2010-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #ifndef BRW_IR_FS_H
26 #define BRW_IR_FS_H
27
28 #include "brw_shader.h"
29
30 class fs_inst;
31
32 class fs_reg : public backend_reg {
33 public:
34 DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35
36 void init();
37
38 fs_reg();
39 explicit fs_reg(float f);
40 explicit fs_reg(int32_t i);
41 explicit fs_reg(uint32_t u);
42 explicit fs_reg(uint8_t vf[4]);
43 explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
44 fs_reg(struct brw_reg fixed_hw_reg);
45 fs_reg(enum register_file file, int reg);
46 fs_reg(enum register_file file, int reg, enum brw_reg_type type);
47 fs_reg(enum register_file file, int reg, enum brw_reg_type type, uint8_t width);
48
49 bool equals(const fs_reg &r) const;
50 bool is_contiguous() const;
51
52 /** Smear a channel of the reg to all channels. */
53 fs_reg &set_smear(unsigned subreg);
54
55 /**
56 * Offset in bytes from the start of the register. Values up to a
57 * backend_reg::reg_offset unit are valid.
58 */
59 int subreg_offset;
60
61 fs_reg *reladdr;
62
63 /**
64 * The register width. This indicates how many hardware values are
65 * represented by each virtual value. Valid values are 1, 8, or 16.
66 * For immediate values, this is 1. Most of the rest of the time, it
67 * will be equal to the dispatch width.
68 */
69 uint8_t width;
70
71 /**
72 * Returns the effective register width when used as a source in the
73 * given instruction. Registers such as uniforms and immediates
74 * effectively take on the width of the instruction in which they are
75 * used.
76 */
77 uint8_t effective_width;
78
79 /** Register region horizontal stride */
80 uint8_t stride;
81 };
82
83 static inline fs_reg
84 negate(fs_reg reg)
85 {
86 assert(reg.file != HW_REG && reg.file != IMM);
87 reg.negate = !reg.negate;
88 return reg;
89 }
90
91 static inline fs_reg
92 retype(fs_reg reg, enum brw_reg_type type)
93 {
94 reg.fixed_hw_reg.type = reg.type = type;
95 return reg;
96 }
97
98 static inline fs_reg
99 byte_offset(fs_reg reg, unsigned delta)
100 {
101 switch (reg.file) {
102 case BAD_FILE:
103 break;
104 case GRF:
105 case ATTR:
106 reg.reg_offset += delta / 32;
107 break;
108 case MRF:
109 reg.reg += delta / 32;
110 break;
111 default:
112 assert(delta == 0);
113 }
114 reg.subreg_offset += delta % 32;
115 return reg;
116 }
117
118 static inline fs_reg
119 horiz_offset(fs_reg reg, unsigned delta)
120 {
121 switch (reg.file) {
122 case BAD_FILE:
123 case UNIFORM:
124 case IMM:
125 /* These only have a single component that is implicitly splatted. A
126 * horizontal offset should be a harmless no-op.
127 */
128 break;
129 case GRF:
130 case MRF:
131 case ATTR:
132 return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
133 default:
134 assert(delta == 0);
135 }
136 return reg;
137 }
138
139 static inline fs_reg
140 offset(fs_reg reg, unsigned delta)
141 {
142 assert(reg.stride > 0);
143 switch (reg.file) {
144 case BAD_FILE:
145 break;
146 case GRF:
147 case MRF:
148 case ATTR:
149 return byte_offset(reg, delta * reg.width * reg.stride * type_sz(reg.type));
150 case UNIFORM:
151 reg.reg_offset += delta;
152 break;
153 default:
154 assert(delta == 0);
155 }
156 return reg;
157 }
158
159 static inline fs_reg
160 component(fs_reg reg, unsigned idx)
161 {
162 assert(reg.subreg_offset == 0);
163 assert(idx < reg.width);
164 reg.subreg_offset = idx * type_sz(reg.type);
165 reg.width = 1;
166 return reg;
167 }
168
169 /**
170 * Get either of the 8-component halves of a 16-component register.
171 *
172 * Note: this also works if \c reg represents a SIMD16 pair of registers.
173 */
174 static inline fs_reg
175 half(fs_reg reg, unsigned idx)
176 {
177 assert(idx < 2);
178
179 if (reg.file == UNIFORM)
180 return reg;
181
182 assert(idx == 0 || (reg.file != HW_REG && reg.file != IMM));
183 assert(reg.width == 16);
184 reg.width = 8;
185 return horiz_offset(reg, 8 * idx);
186 }
187
188 static const fs_reg reg_undef;
189
190 class fs_inst : public backend_instruction {
191 fs_inst &operator=(const fs_inst &);
192
193 void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
194 const fs_reg *src, unsigned sources);
195
196 public:
197 DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
198
199 fs_inst();
200 fs_inst(enum opcode opcode, uint8_t exec_size);
201 fs_inst(enum opcode opcode, const fs_reg &dst);
202 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
203 const fs_reg &src0);
204 fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
205 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
206 const fs_reg &src0, const fs_reg &src1);
207 fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
208 const fs_reg &src1);
209 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
210 const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
211 fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
212 const fs_reg &src1, const fs_reg &src2);
213 fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg src[],
214 unsigned sources);
215 fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
216 const fs_reg src[], unsigned sources);
217 fs_inst(const fs_inst &that);
218 ~fs_inst();
219
220 void resize_sources(uint8_t num_sources);
221
222 bool equals(fs_inst *inst) const;
223 bool overwrites_reg(const fs_reg &reg) const;
224 bool is_send_from_grf() const;
225 bool is_partial_write() const;
226 int regs_read(int arg) const;
227 bool can_do_source_mods(struct brw_context *brw);
228
229 bool reads_flag() const;
230 bool writes_flag() const;
231
232 fs_reg dst;
233 fs_reg *src;
234
235 uint8_t sources; /**< Number of fs_reg sources. */
236
237 /**
238 * Execution size of the instruction. This is used by the generator to
239 * generate the correct binary for the given fs_inst. Current valid
240 * values are 1, 8, 16.
241 */
242 uint8_t exec_size;
243
244 bool eot:1;
245 bool force_uncompressed:1;
246 bool force_sechalf:1;
247 bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
248 };
249
250 #endif