2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_fs_combine_constants.cpp
26 * This file contains the opt_combine_constants() pass that runs after the
27 * regular optimization loop. It passes over the instruction list and
28 * selectively promotes immediate values to registers by emitting a mov(1)
31 * This is useful on Gen 7 particularly, because a few instructions can be
32 * coissued (i.e., issued in the same cycle as another thread on the same EU
33 * issues an instruction) under some circumstances, one of which is that they
34 * cannot use immediate values.
42 static const bool debug
= false;
44 /* Returns whether an instruction could co-issue if its immediate source were
45 * replaced with a GRF source.
48 could_coissue(const struct brw_device_info
*devinfo
, const fs_inst
*inst
)
50 if (devinfo
->gen
!= 7)
53 switch (inst
->opcode
) {
65 * Returns true for instructions that don't support immediate sources.
68 must_promote_imm(const struct brw_device_info
*devinfo
, const fs_inst
*inst
)
70 switch (inst
->opcode
) {
71 case SHADER_OPCODE_POW
:
72 return devinfo
->gen
< 8;
81 /** A box for putting fs_regs in a linked list. */
83 DECLARE_RALLOC_CXX_OPERATORS(reg_link
)
85 reg_link(fs_reg
*reg
) : reg(reg
) {}
87 struct exec_node link
;
91 static struct exec_node
*
92 link(void *mem_ctx
, fs_reg
*reg
)
94 reg_link
*l
= new(mem_ctx
) reg_link(reg
);
99 * Information about an immediate value.
102 /** The common ancestor of all blocks using this immediate value. */
106 * The instruction generating the immediate value, if all uses are contained
107 * within a single basic block. Otherwise, NULL.
112 * A list of fs_regs that refer to this immediate. If we promote it, we'll
113 * have to patch these up to refer to the new GRF.
117 /** The immediate value. We currently only handle floats. */
121 * The GRF register and subregister number where we've decided to store the
124 uint8_t subreg_offset
;
127 /** The number of coissuable instructions using this immediate. */
128 uint16_t uses_by_coissue
;
131 * Whether this constant is used by an instruction that can't handle an
132 * immediate source (and already has to be promoted to a GRF).
136 uint16_t first_use_ip
;
137 uint16_t last_use_ip
;
140 /** The working set of information about immediates. */
148 find_imm(struct table
*table
, float val
)
150 assert(signbit(val
) == 0);
152 for (int i
= 0; i
< table
->len
; i
++) {
153 if (table
->imm
[i
].val
== val
) {
154 return &table
->imm
[i
];
161 new_imm(struct table
*table
, void *mem_ctx
)
163 if (table
->len
== table
->size
) {
165 table
->imm
= reralloc(mem_ctx
, table
->imm
, struct imm
, table
->size
);
167 return &table
->imm
[table
->len
++];
171 * Comparator used for sorting an array of imm structures.
173 * We sort by basic block number, then last use IP, then first use IP (least
174 * to greatest). This sorting causes immediates live in the same area to be
175 * allocated to the same register in the hopes that all values will be dead
176 * about the same time and the register can be reused.
179 compare(const void *_a
, const void *_b
)
181 const struct imm
*a
= (const struct imm
*)_a
,
182 *b
= (const struct imm
*)_b
;
184 int block_diff
= a
->block
->num
- b
->block
->num
;
188 int end_diff
= a
->last_use_ip
- b
->last_use_ip
;
192 return a
->first_use_ip
- b
->first_use_ip
;
196 fs_visitor::opt_combine_constants()
198 void *const_ctx
= ralloc_context(NULL
);
203 table
.imm
= ralloc_array(const_ctx
, struct imm
, table
.size
);
205 cfg
->calculate_idom();
208 /* Make a pass through all instructions and count the number of times each
209 * constant is used by coissueable instructions or instructions that cannot
210 * take immediate arguments.
212 foreach_block_and_inst(block
, fs_inst
, inst
, cfg
) {
215 if (!could_coissue(devinfo
, inst
) && !must_promote_imm(devinfo
, inst
))
218 for (int i
= 0; i
< inst
->sources
; i
++) {
219 if (inst
->src
[i
].file
!= IMM
||
220 inst
->src
[i
].type
!= BRW_REGISTER_TYPE_F
)
223 float val
= fabsf(inst
->src
[i
].f
);
224 struct imm
*imm
= find_imm(&table
, val
);
227 bblock_t
*intersection
= cfg_t::intersect(block
, imm
->block
);
228 if (intersection
!= imm
->block
)
230 imm
->block
= intersection
;
231 imm
->uses
->push_tail(link(const_ctx
, &inst
->src
[i
]));
232 imm
->uses_by_coissue
+= could_coissue(devinfo
, inst
);
233 imm
->must_promote
= imm
->must_promote
|| must_promote_imm(devinfo
, inst
);
234 imm
->last_use_ip
= ip
;
236 imm
= new_imm(&table
, const_ctx
);
239 imm
->uses
= new(const_ctx
) exec_list();
240 imm
->uses
->push_tail(link(const_ctx
, &inst
->src
[i
]));
242 imm
->uses_by_coissue
= could_coissue(devinfo
, inst
);
243 imm
->must_promote
= must_promote_imm(devinfo
, inst
);
244 imm
->first_use_ip
= ip
;
245 imm
->last_use_ip
= ip
;
250 /* Remove constants from the table that don't have enough uses to make them
251 * profitable to store in a register.
253 for (int i
= 0; i
< table
.len
;) {
254 struct imm
*imm
= &table
.imm
[i
];
256 if (!imm
->must_promote
&& imm
->uses_by_coissue
< 4) {
257 table
.imm
[i
] = table
.imm
[table
.len
- 1];
263 if (table
.len
== 0) {
264 ralloc_free(const_ctx
);
267 if (cfg
->num_blocks
!= 1)
268 qsort(table
.imm
, table
.len
, sizeof(struct imm
), compare
);
270 /* Insert MOVs to load the constant values into GRFs. */
271 fs_reg
reg(VGRF
, alloc
.allocate(1));
273 for (int i
= 0; i
< table
.len
; i
++) {
274 struct imm
*imm
= &table
.imm
[i
];
275 /* Insert it either before the instruction that generated the immediate
276 * or after the last non-control flow instruction of the common ancestor.
278 exec_node
*n
= (imm
->inst
? imm
->inst
:
279 imm
->block
->last_non_control_flow_inst()->next
);
280 const fs_builder ibld
= bld
.at(imm
->block
, n
).exec_all().group(1, 0);
282 ibld
.MOV(reg
, brw_imm_f(imm
->val
));
284 imm
->subreg_offset
= reg
.subreg_offset
;
286 reg
.subreg_offset
+= sizeof(float);
287 if ((unsigned)reg
.subreg_offset
== 8 * sizeof(float)) {
288 reg
.nr
= alloc
.allocate(1);
289 reg
.subreg_offset
= 0;
292 promoted_constants
= table
.len
;
294 /* Rewrite the immediate sources to refer to the new GRFs. */
295 for (int i
= 0; i
< table
.len
; i
++) {
296 foreach_list_typed(reg_link
, link
, link
, table
.imm
[i
].uses
) {
297 fs_reg
*reg
= link
->reg
;
299 reg
->nr
= table
.imm
[i
].nr
;
300 reg
->subreg_offset
= table
.imm
[i
].subreg_offset
;
302 reg
->negate
= signbit(reg
->f
) != signbit(table
.imm
[i
].val
);
303 assert((isnan(reg
->f
) && isnan(table
.imm
[i
].val
)) ||
304 fabsf(reg
->f
) == table
.imm
[i
].val
);
309 for (int i
= 0; i
< table
.len
; i
++) {
310 struct imm
*imm
= &table
.imm
[i
];
312 printf("%.3fF - block %3d, reg %3d sub %2d, Uses: (%2d, %2d), "
313 "IP: %4d to %4d, length %4d\n",
319 imm
->uses_by_coissue
,
322 imm
->last_use_ip
- imm
->first_use_ip
);
326 ralloc_free(const_ctx
);
327 invalidate_live_intervals();