2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 #include "main/macros.h"
29 #include "program/program.h"
30 #include "program/prog_print.h"
31 #include "brw_context.h"
32 #include "brw_defines.h"
35 const struct brw_instruction_info brw_opcodes
[128] = {
36 [BRW_OPCODE_MOV
] = { .name
= "mov", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
37 [BRW_OPCODE_FRC
] = { .name
= "frc", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
38 [BRW_OPCODE_RNDU
] = { .name
= "rndu", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
39 [BRW_OPCODE_RNDD
] = { .name
= "rndd", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
40 [BRW_OPCODE_RNDE
] = { .name
= "rnde", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
41 [BRW_OPCODE_RNDZ
] = { .name
= "rndz", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
42 [BRW_OPCODE_NOT
] = { .name
= "not", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
43 [BRW_OPCODE_LZD
] = { .name
= "lzd", .nsrc
= 1, .ndst
= 1 },
45 [BRW_OPCODE_MUL
] = { .name
= "mul", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
46 [BRW_OPCODE_MAC
] = { .name
= "mac", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
47 [BRW_OPCODE_MACH
] = { .name
= "mach", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
48 [BRW_OPCODE_LINE
] = { .name
= "line", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
49 [BRW_OPCODE_PLN
] = { .name
= "pln", .nsrc
= 2, .ndst
= 1 },
50 [BRW_OPCODE_SAD2
] = { .name
= "sad2", .nsrc
= 2, .ndst
= 1 },
51 [BRW_OPCODE_SADA2
] = { .name
= "sada2", .nsrc
= 2, .ndst
= 1 },
52 [BRW_OPCODE_DP4
] = { .name
= "dp4", .nsrc
= 2, .ndst
= 1 },
53 [BRW_OPCODE_DPH
] = { .name
= "dph", .nsrc
= 2, .ndst
= 1 },
54 [BRW_OPCODE_DP3
] = { .name
= "dp3", .nsrc
= 2, .ndst
= 1 },
55 [BRW_OPCODE_DP2
] = { .name
= "dp2", .nsrc
= 2, .ndst
= 1 },
56 [BRW_OPCODE_MATH
] = { .name
= "math", .nsrc
= 2, .ndst
= 1 },
58 [BRW_OPCODE_AVG
] = { .name
= "avg", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
59 [BRW_OPCODE_ADD
] = { .name
= "add", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
60 [BRW_OPCODE_SEL
] = { .name
= "sel", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
61 [BRW_OPCODE_AND
] = { .name
= "and", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
62 [BRW_OPCODE_OR
] = { .name
= "or", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
63 [BRW_OPCODE_XOR
] = { .name
= "xor", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
64 [BRW_OPCODE_SHR
] = { .name
= "shr", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
65 [BRW_OPCODE_SHL
] = { .name
= "shl", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
66 [BRW_OPCODE_ASR
] = { .name
= "asr", .nsrc
= 2, .ndst
= 1 },
67 [BRW_OPCODE_CMP
] = { .name
= "cmp", .nsrc
= 2, .ndst
= 1 },
68 [BRW_OPCODE_CMPN
] = { .name
= "cmpn", .nsrc
= 2, .ndst
= 1 },
70 [BRW_OPCODE_SEND
] = { .name
= "send", .nsrc
= 1, .ndst
= 1 },
71 [BRW_OPCODE_NOP
] = { .name
= "nop", .nsrc
= 0, .ndst
= 0 },
72 [BRW_OPCODE_JMPI
] = { .name
= "jmpi", .nsrc
= 1, .ndst
= 0 },
73 [BRW_OPCODE_IF
] = { .name
= "if", .nsrc
= 2, .ndst
= 0 },
74 [BRW_OPCODE_IFF
] = { .name
= "iff", .nsrc
= 2, .ndst
= 1 },
75 [BRW_OPCODE_WHILE
] = { .name
= "while", .nsrc
= 2, .ndst
= 0 },
76 [BRW_OPCODE_ELSE
] = { .name
= "else", .nsrc
= 2, .ndst
= 0 },
77 [BRW_OPCODE_BREAK
] = { .name
= "break", .nsrc
= 2, .ndst
= 0 },
78 [BRW_OPCODE_CONTINUE
] = { .name
= "cont", .nsrc
= 1, .ndst
= 0 },
79 [BRW_OPCODE_HALT
] = { .name
= "halt", .nsrc
= 1, .ndst
= 0 },
80 [BRW_OPCODE_MSAVE
] = { .name
= "msave", .nsrc
= 1, .ndst
= 1 },
81 [BRW_OPCODE_PUSH
] = { .name
= "push", .nsrc
= 1, .ndst
= 1 },
82 [BRW_OPCODE_MRESTORE
] = { .name
= "mrest", .nsrc
= 1, .ndst
= 1 },
83 [BRW_OPCODE_POP
] = { .name
= "pop", .nsrc
= 2, .ndst
= 0 },
84 [BRW_OPCODE_WAIT
] = { .name
= "wait", .nsrc
= 1, .ndst
= 0 },
85 [BRW_OPCODE_DO
] = { .name
= "do", .nsrc
= 0, .ndst
= 0 },
86 [BRW_OPCODE_ENDIF
] = { .name
= "endif", .nsrc
= 2, .ndst
= 0 },
90 GLboolean
brw_is_arithmetic_inst(const struct brw_instruction
*inst
)
92 return brw_opcodes
[inst
->header
.opcode
].is_arith
;
95 static const GLuint inst_stride
[7] = {
105 static const GLuint inst_type_size
[8] = {
106 [BRW_REGISTER_TYPE_UD
] = 4,
107 [BRW_REGISTER_TYPE_D
] = 4,
108 [BRW_REGISTER_TYPE_UW
] = 2,
109 [BRW_REGISTER_TYPE_W
] = 2,
110 [BRW_REGISTER_TYPE_UB
] = 1,
111 [BRW_REGISTER_TYPE_B
] = 1,
112 [BRW_REGISTER_TYPE_F
] = 4
115 static INLINE GLboolean
116 brw_is_grf_written(const struct brw_instruction
*inst
,
117 int reg_index
, int size
,
120 if (brw_opcodes
[inst
->header
.opcode
].ndst
== 0)
123 if (inst
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
)
124 if (inst
->bits1
.ia1
.dest_reg_file
== BRW_GENERAL_REGISTER_FILE
)
127 if (inst
->bits1
.da1
.dest_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
130 const int reg_start
= reg_index
* REG_SIZE
;
131 const int reg_end
= reg_start
+ size
;
133 const int type_size
= inst_type_size
[inst
->bits1
.da1
.dest_reg_type
];
134 const int write_start
= inst
->bits1
.da1
.dest_reg_nr
*REG_SIZE
135 + inst
->bits1
.da1
.dest_subreg_nr
;
136 int length
, write_end
;
138 /* SEND is specific */
139 if (inst
->header
.opcode
== BRW_OPCODE_SEND
) {
141 length
= inst
->bits3
.generic_gen5
.response_length
*REG_SIZE
;
143 length
= inst
->bits3
.generic
.response_length
*REG_SIZE
;
146 length
= 1 << inst
->header
.execution_size
;
148 length
*= inst
->bits1
.da1
.dest_horiz_stride
;
151 /* If the two intervals intersect, we overwrite the register */
152 write_end
= write_start
+ length
;
153 const int left
= MAX2(write_start
, reg_start
);
154 const int right
= MIN2(write_end
, reg_end
);
160 brw_is_mrf_written_alu(const struct brw_instruction
*inst
,
161 int reg_index
, int size
)
163 if (brw_opcodes
[inst
->header
.opcode
].ndst
== 0)
166 if (inst
->bits1
.da1
.dest_reg_file
!= BRW_MESSAGE_REGISTER_FILE
)
169 if (inst
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
)
172 const int reg_start
= reg_index
* REG_SIZE
;
173 const int reg_end
= reg_start
+ size
;
175 const int mrf_index
= inst
->bits1
.da1
.dest_reg_nr
& 0x0f;
176 const int is_compr4
= inst
->bits1
.da1
.dest_reg_nr
& BRW_MRF_COMPR4
;
177 const int type_size
= inst_type_size
[inst
->bits1
.da1
.dest_reg_type
];
179 /* We use compr4 with a size != 16 elements. Strange, we conservatively
180 * consider that we are writing the register.
182 if (is_compr4
&& inst
->header
.execution_size
!= BRW_EXECUTE_16
)
185 /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
187 const int length
= 8 * type_size
* inst
->bits1
.da1
.dest_horiz_stride
;
189 /* First 8-way register */
190 const int write_start0
= mrf_index
*REG_SIZE
191 + inst
->bits1
.da1
.dest_subreg_nr
;
192 const int write_end0
= write_start0
+ length
;
194 /* Second 8-way register */
195 const int write_start1
= (mrf_index
+4)*REG_SIZE
196 + inst
->bits1
.da1
.dest_subreg_nr
;
197 const int write_end1
= write_start1
+ length
;
199 /* If the two intervals intersect, we overwrite the register */
200 const int left0
= MAX2(write_start0
, reg_start
);
201 const int right0
= MIN2(write_end0
, reg_end
);
202 const int left1
= MAX2(write_start1
, reg_start
);
203 const int right1
= MIN2(write_end1
, reg_end
);
205 if (left0
< right0
|| left1
< right1
)
210 length
= 1 << inst
->header
.execution_size
;
212 length
*= inst
->bits1
.da1
.dest_horiz_stride
;
214 /* If the two intervals intersect, we write into the register */
215 const int write_start
= inst
->bits1
.da1
.dest_reg_nr
*REG_SIZE
216 + inst
->bits1
.da1
.dest_subreg_nr
;
217 const int write_end
= write_start
+ length
;
218 const int left
= MAX2(write_start
, reg_start
);
219 const int right
= MIN2(write_end
, reg_end
);
228 /* SEND may perform an implicit mov to a mrf register */
229 static GLboolean
brw_is_mrf_written_send(const struct brw_instruction
*inst
,
230 int reg_index
, int size
)
233 const int reg_start
= reg_index
* REG_SIZE
;
234 const int reg_end
= reg_start
+ size
;
235 const int mrf_start
= inst
->header
.destreg__conditionalmod
;
236 const int write_start
= mrf_start
* REG_SIZE
;
237 const int write_end
= write_start
+ REG_SIZE
;
238 const int left
= MAX2(write_start
, reg_start
);
239 const int right
= MIN2(write_end
, reg_end
);
241 if (inst
->header
.opcode
!= BRW_OPCODE_SEND
||
242 inst
->bits1
.da1
.src0_reg_file
== 0)
248 /* Specific path for message register since we need to handle the compr4 case */
249 static INLINE GLboolean
250 brw_is_mrf_written(const struct brw_instruction
*inst
, int reg_index
, int size
)
252 return (brw_is_mrf_written_alu(inst
, reg_index
, size
) ||
253 brw_is_mrf_written_send(inst
, reg_index
, size
));
256 static INLINE GLboolean
257 brw_is_mrf_read(const struct brw_instruction
*inst
,
258 int reg_index
, int size
, int gen
)
260 if (inst
->header
.opcode
!= BRW_OPCODE_SEND
)
262 if (inst
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
)
265 const int reg_start
= reg_index
*REG_SIZE
;
266 const int reg_end
= reg_start
+ size
;
268 int length
, read_start
, read_end
;
270 length
= inst
->bits3
.generic_gen5
.msg_length
*REG_SIZE
;
272 length
= inst
->bits3
.generic
.msg_length
*REG_SIZE
;
274 /* Look if SEND uses an implicit mov. In that case, we read one less register
277 if (inst
->bits1
.da1
.src0_reg_file
!= 0)
278 read_start
= inst
->header
.destreg__conditionalmod
;
281 read_start
= inst
->header
.destreg__conditionalmod
+ 1;
283 read_start
*= REG_SIZE
;
284 read_end
= read_start
+ length
;
286 const int left
= MAX2(read_start
, reg_start
);
287 const int right
= MIN2(read_end
, reg_end
);
292 static INLINE GLboolean
293 brw_is_grf_read(const struct brw_instruction
*inst
, int reg_index
, int size
)
296 if (brw_opcodes
[inst
->header
.opcode
].nsrc
== 0)
299 /* Look at first source. We must take into account register regions to
300 * monitor carefully the read. Note that we are a bit too conservative here
301 * since we do not take into account the fact that some complete registers
304 if (brw_opcodes
[inst
->header
.opcode
].nsrc
>= 1) {
306 if (inst
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
)
307 if (inst
->bits1
.ia1
.src0_reg_file
== BRW_GENERAL_REGISTER_FILE
)
309 if (inst
->bits1
.da1
.src0_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
312 const int reg_start
= reg_index
*REG_SIZE
;
313 const int reg_end
= reg_start
+ size
;
315 /* See if at least one of this element intersects the interval */
316 const int type_size
= inst_type_size
[inst
->bits1
.da1
.src0_reg_type
];
317 const int elem_num
= 1 << inst
->header
.execution_size
;
318 const int width
= 1 << inst
->bits2
.da1
.src0_width
;
319 const int row_num
= elem_num
>> inst
->bits2
.da1
.src0_width
;
320 const int hs
= type_size
*inst_stride
[inst
->bits2
.da1
.src0_horiz_stride
];
321 const int vs
= type_size
*inst_stride
[inst
->bits2
.da1
.src0_vert_stride
];
322 int row_start
= inst
->bits2
.da1
.src0_reg_nr
*REG_SIZE
323 + inst
->bits2
.da1
.src0_subreg_nr
;
324 for (j
= 0; j
< row_num
; ++j
) {
325 int write_start
= row_start
;
326 for (i
= 0; i
< width
; ++i
) {
327 const int write_end
= write_start
+ type_size
;
328 const int left
= write_start
> reg_start
? write_start
: reg_start
;
329 const int right
= write_end
< reg_end
? write_end
: reg_end
;
338 /* Second src register */
339 if (brw_opcodes
[inst
->header
.opcode
].nsrc
>= 2) {
341 if (inst
->bits3
.da1
.src1_address_mode
!= BRW_ADDRESS_DIRECT
)
342 if (inst
->bits1
.ia1
.src1_reg_file
== BRW_GENERAL_REGISTER_FILE
)
344 if (inst
->bits1
.da1
.src1_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
347 const int reg_start
= reg_index
*REG_SIZE
;
348 const int reg_end
= reg_start
+ size
;
350 /* See if at least one of this element intersects the interval */
351 const int type_size
= inst_type_size
[inst
->bits1
.da1
.src1_reg_type
];
352 const int elem_num
= 1 << inst
->header
.execution_size
;
353 const int width
= 1 << inst
->bits3
.da1
.src1_width
;
354 const int row_num
= elem_num
>> inst
->bits3
.da1
.src1_width
;
355 const int hs
= type_size
*inst_stride
[inst
->bits3
.da1
.src1_horiz_stride
];
356 const int vs
= type_size
*inst_stride
[inst
->bits3
.da1
.src1_vert_stride
];
357 int row_start
= inst
->bits3
.da1
.src1_reg_nr
*REG_SIZE
358 + inst
->bits3
.da1
.src1_subreg_nr
;
359 for (j
= 0; j
< row_num
; ++j
) {
360 int write_start
= row_start
;
361 for (i
= 0; i
< width
; ++i
) {
362 const int write_end
= write_start
+ type_size
;
363 const int left
= write_start
> reg_start
? write_start
: reg_start
;
364 const int right
= write_end
< reg_end
? write_end
: reg_end
;
376 static INLINE GLboolean
377 brw_is_control_done(const struct brw_instruction
*mov
) {
379 mov
->header
.dependency_control
!= 0 ||
380 mov
->header
.thread_control
!= 0 ||
381 mov
->header
.mask_control
!= 0 ||
382 mov
->header
.saturate
!= 0 ||
383 mov
->header
.debug_control
!= 0;
386 static INLINE GLboolean
387 brw_is_predicated(const struct brw_instruction
*mov
) {
388 return mov
->header
.predicate_control
!= 0;
391 static INLINE GLboolean
392 brw_is_grf_to_mrf_mov(const struct brw_instruction
*mov
,
395 GLboolean
*is_compr4
)
397 if (brw_is_predicated(mov
) ||
398 brw_is_control_done(mov
) ||
399 mov
->header
.debug_control
!= 0)
402 if (mov
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
||
403 mov
->bits1
.da1
.dest_reg_file
!= BRW_MESSAGE_REGISTER_FILE
||
404 mov
->bits1
.da1
.dest_reg_type
!= BRW_REGISTER_TYPE_F
||
405 mov
->bits1
.da1
.dest_horiz_stride
!= BRW_HORIZONTAL_STRIDE_1
||
406 mov
->bits1
.da1
.dest_subreg_nr
!= 0)
409 if (mov
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
||
410 mov
->bits1
.da1
.src0_reg_file
!= BRW_GENERAL_REGISTER_FILE
||
411 mov
->bits1
.da1
.src0_reg_type
!= BRW_REGISTER_TYPE_F
||
412 mov
->bits2
.da1
.src0_width
!= BRW_WIDTH_8
||
413 mov
->bits2
.da1
.src0_horiz_stride
!= BRW_HORIZONTAL_STRIDE_1
||
414 mov
->bits2
.da1
.src0_vert_stride
!= BRW_VERTICAL_STRIDE_8
||
415 mov
->bits2
.da1
.src0_subreg_nr
!= 0 ||
416 mov
->bits2
.da1
.src0_abs
!= 0 ||
417 mov
->bits2
.da1
.src0_negate
!= 0)
420 *grf_index
= mov
->bits2
.da1
.src0_reg_nr
;
421 *mrf_index
= mov
->bits1
.da1
.dest_reg_nr
& 0x0f;
422 *is_compr4
= (mov
->bits1
.da1
.dest_reg_nr
& BRW_MRF_COMPR4
) != 0;
426 static INLINE GLboolean
427 brw_is_grf_straight_write(const struct brw_instruction
*inst
, int grf_index
)
429 /* remark: no problem to predicate a SEL instruction */
430 if ((!brw_is_predicated(inst
) || inst
->header
.opcode
== BRW_OPCODE_SEL
) &&
431 brw_is_control_done(inst
) == GL_FALSE
&&
432 inst
->header
.execution_size
== 4 &&
433 inst
->header
.access_mode
== BRW_ALIGN_1
&&
434 inst
->bits1
.da1
.dest_address_mode
== BRW_ADDRESS_DIRECT
&&
435 inst
->bits1
.da1
.dest_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
436 inst
->bits1
.da1
.dest_reg_type
== BRW_REGISTER_TYPE_F
&&
437 inst
->bits1
.da1
.dest_horiz_stride
== BRW_HORIZONTAL_STRIDE_1
&&
438 inst
->bits1
.da1
.dest_reg_nr
== grf_index
&&
439 inst
->bits1
.da1
.dest_subreg_nr
== 0 &&
440 brw_is_arithmetic_inst(inst
))
446 static INLINE GLboolean
447 brw_inst_are_equal(const struct brw_instruction
*src0
,
448 const struct brw_instruction
*src1
)
450 const GLuint
*field0
= (GLuint
*) src0
;
451 const GLuint
*field1
= (GLuint
*) src1
;
452 return field0
[0] == field1
[0] &&
453 field0
[1] == field1
[1] &&
454 field0
[2] == field1
[2] &&
455 field0
[3] == field1
[3];
459 brw_inst_copy(struct brw_instruction
*dst
,
460 const struct brw_instruction
*src
)
462 GLuint
*field_dst
= (GLuint
*) dst
;
463 const GLuint
*field_src
= (GLuint
*) src
;
464 field_dst
[0] = field_src
[0];
465 field_dst
[1] = field_src
[1];
466 field_dst
[2] = field_src
[2];
467 field_dst
[3] = field_src
[3];
470 static void brw_remove_inst(struct brw_compile
*p
, const GLboolean
*removeInst
)
472 int i
, nr_insn
= 0, to
= 0, from
= 0;
474 for (from
= 0; from
< p
->nr_insn
; ++from
) {
475 if (removeInst
[from
])
478 brw_inst_copy(p
->store
+ to
, p
->store
+ from
);
482 for (i
= 0; i
< p
->nr_insn
; ++i
)
483 if (removeInst
[i
] == GL_FALSE
)
485 p
->nr_insn
= nr_insn
;
488 /* The gen code emitter generates a lot of duplications in the
489 * grf-to-mrf moves, for example when texture sampling with the same
490 * coordinates from multiple textures.. Here, we monitor same mov
491 * grf-to-mrf instrutions and remove repeated ones where the operands
492 * and dst ahven't changed in between.
494 void brw_remove_duplicate_mrf_moves(struct brw_compile
*p
)
496 const int gen
= p
->brw
->intel
.gen
;
499 GLboolean
*removeInst
= calloc(sizeof(GLboolean
), p
->nr_insn
);
500 for (i
= 0; i
< p
->nr_insn
; i
++) {
504 const struct brw_instruction
*mov
= p
->store
+ i
;
505 int mrf_index
, grf_index
;
508 /* Only consider _straight_ grf-to-mrf moves */
509 if (!brw_is_grf_to_mrf_mov(mov
, &mrf_index
, &grf_index
, &is_compr4
))
512 const int mrf_index0
= mrf_index
;
513 const int mrf_index1
= is_compr4
? mrf_index0
+4 : mrf_index0
+1;
514 const int simd16_size
= 2 * REG_SIZE
;
516 for (j
= i
+ 1; j
< p
->nr_insn
; j
++) {
517 const struct brw_instruction
*inst
= p
->store
+ j
;
519 if (brw_inst_are_equal(mov
, inst
)) {
520 removeInst
[j
] = GL_TRUE
;
524 if (brw_is_grf_written(inst
, grf_index
, simd16_size
, gen
) ||
525 brw_is_mrf_written(inst
, mrf_index0
, REG_SIZE
) ||
526 brw_is_mrf_written(inst
, mrf_index1
, REG_SIZE
))
531 brw_remove_inst(p
, removeInst
);
535 /* Replace moves to MRFs where the value moved is the result of a
536 * normal arithmetic operation with computation right into the MRF.
538 void brw_remove_grf_to_mrf_moves(struct brw_compile
*p
)
541 struct brw_context
*brw
= p
->brw
;
542 const int gen
= brw
->intel
.gen
;
543 const int simd16_size
= 2*REG_SIZE
;
545 GLboolean
*removeInst
= calloc(sizeof(GLboolean
), p
->nr_insn
);
548 for (i
= 0; i
< p
->nr_insn
; i
++) {
552 struct brw_instruction
*grf_inst
= NULL
;
553 const struct brw_instruction
*mov
= p
->store
+ i
;
554 int mrf_index
, grf_index
;
557 /* Only consider _straight_ grf-to-mrf moves */
558 if (!brw_is_grf_to_mrf_mov(mov
, &mrf_index
, &grf_index
, &is_compr4
))
561 /* Using comp4 enables a stride of 4 for this instruction */
562 const int mrf_index0
= mrf_index
;
563 const int mrf_index1
= is_compr4
? mrf_index
+4 : mrf_index
+1;
565 /* Look where the register has been set */
567 GLboolean potential_remove
= GL_FALSE
;
570 /* If _one_ instruction writes the grf, we try to remove the mov */
571 struct brw_instruction
*inst
= p
->store
+ prev
;
572 if (brw_is_grf_straight_write(inst
, grf_index
)) {
573 potential_remove
= GL_TRUE
;
580 if (potential_remove
== GL_FALSE
)
582 removeInst
[i
] = GL_TRUE
;
584 /* Monitor first the section of code between the grf computation and the
585 * mov. Here we cannot read or write both mrf and grf register
587 for (j
= prev
+ 1; j
< i
; ++j
) {
588 struct brw_instruction
*inst
= p
->store
+ j
;
591 if (brw_is_grf_written(inst
, grf_index
, simd16_size
, gen
) ||
592 brw_is_grf_read(inst
, grf_index
, simd16_size
) ||
593 brw_is_mrf_written(inst
, mrf_index0
, REG_SIZE
) ||
594 brw_is_mrf_written(inst
, mrf_index1
, REG_SIZE
) ||
595 brw_is_mrf_read(inst
, mrf_index0
, REG_SIZE
, gen
) ||
596 brw_is_mrf_read(inst
, mrf_index1
, REG_SIZE
, gen
)) {
597 removeInst
[i
] = GL_FALSE
;
602 /* After the mov, we can read or write the mrf. If the grf is overwritten,
605 for (j
= i
+ 1; j
< p
->nr_insn
; ++j
) {
606 struct brw_instruction
*inst
= p
->store
+ j
;
610 if (brw_is_grf_read(inst
, grf_index
, simd16_size
)) {
611 removeInst
[i
] = GL_FALSE
;
615 if (brw_is_grf_straight_write(inst
, grf_index
))
619 /* Note that with the top down traversal, we can safely pacth the mov
623 grf_inst
->bits1
.da1
.dest_reg_file
= mov
->bits1
.da1
.dest_reg_file
;
624 grf_inst
->bits1
.da1
.dest_reg_nr
= mov
->bits1
.da1
.dest_reg_nr
;
628 brw_remove_inst(p
, removeInst
);
633 is_single_channel_dp4(struct brw_instruction
*insn
)
635 if (insn
->header
.opcode
!= BRW_OPCODE_DP4
||
636 insn
->header
.execution_size
!= BRW_EXECUTE_8
||
637 insn
->header
.access_mode
!= BRW_ALIGN_16
||
638 insn
->bits1
.da1
.dest_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
641 if (!is_power_of_two(insn
->bits1
.da16
.dest_writemask
))
648 * Sets the dependency control fields on DP4 instructions.
650 * The hardware only tracks dependencies on a register basis, so when
653 * DP4 dst.x src1 src2
654 * DP4 dst.y src1 src3
655 * DP4 dst.z src1 src4
656 * DP4 dst.w src1 src5
658 * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
659 * We can examine our instruction stream and set the dependency
660 * control fields to tell the hardware when to do it.
662 * We may want to extend this to other instructions that are used to
663 * fill in a channel at a time of the destination register.
666 brw_set_dp4_dependency_control(struct brw_compile
*p
)
670 for (i
= 1; i
< p
->nr_insn
; i
++) {
671 struct brw_instruction
*insn
= &p
->store
[i
];
672 struct brw_instruction
*prev
= &p
->store
[i
- 1];
674 if (!is_single_channel_dp4(prev
))
677 if (!is_single_channel_dp4(insn
)) {
682 /* Only avoid hw dep control if the write masks are different
683 * channels of one reg.
685 if (insn
->bits1
.da16
.dest_writemask
== prev
->bits1
.da16
.dest_writemask
)
687 if (insn
->bits1
.da16
.dest_reg_nr
!= prev
->bits1
.da16
.dest_reg_nr
)
690 /* Check if the second instruction depends on the previous one
693 if (insn
->bits1
.da1
.src0_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
694 (insn
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
||
695 insn
->bits2
.da1
.src0_reg_nr
== insn
->bits1
.da16
.dest_reg_nr
))
697 if (insn
->bits1
.da1
.src1_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
698 (insn
->bits3
.da1
.src1_address_mode
!= BRW_ADDRESS_DIRECT
||
699 insn
->bits3
.da1
.src1_reg_nr
== insn
->bits1
.da16
.dest_reg_nr
))
702 prev
->header
.dependency_control
|= BRW_DEPENDENCY_NOTCLEARED
;
703 insn
->header
.dependency_control
|= BRW_DEPENDENCY_NOTCHECKED
;
708 brw_optimize(struct brw_compile
*p
)
710 brw_set_dp4_dependency_control(p
);