2 * Copyright © 2015 Connor Abbott
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "brw_fs_builder.h"
31 supports_type_conversion(const fs_inst
*inst
) {
32 switch (inst
->opcode
) {
34 case SHADER_OPCODE_MOV_INDIRECT
:
37 return inst
->dst
.type
== get_exec_type(inst
);
39 /* FIXME: We assume the opcodes don't explicitly mentioned
40 * before just work fine with arbitrary conversions.
47 fs_visitor::lower_conversions()
49 bool progress
= false;
51 foreach_block_and_inst(block
, fs_inst
, inst
, cfg
) {
52 const fs_builder
ibld(this, block
, inst
);
53 fs_reg dst
= inst
->dst
;
54 bool saturate
= inst
->saturate
;
56 if (supports_type_conversion(inst
)) {
57 if (get_exec_type_size(inst
) == 8 && type_sz(inst
->dst
.type
) < 8) {
58 /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
59 * Single Precision Float":
61 * The upper Dword of every Qword will be written with undefined
62 * value when converting DF to F.
64 * So we need to allocate a temporary that's two registers, and then do
65 * a strided MOV to get the lower DWord of every Qword that has the
68 fs_reg temp
= ibld
.vgrf(get_exec_type(inst
));
69 fs_reg strided_temp
= subscript(temp
, dst
.type
, 0);
71 assert(inst
->size_written
== inst
->dst
.component_size(inst
->exec_size
));
72 inst
->dst
= strided_temp
;
73 inst
->saturate
= false;
74 /* As it is an strided destination, we write n-times more being n the
75 * size ratio between source and destination types. Update
76 * size_written accordingly.
78 inst
->size_written
= inst
->dst
.component_size(inst
->exec_size
);
79 ibld
.at(block
, inst
->next
).MOV(dst
, strided_temp
)->saturate
= saturate
;
84 fs_reg temp0
= ibld
.vgrf(get_exec_type(inst
));
86 assert(inst
->size_written
== inst
->dst
.component_size(inst
->exec_size
));
88 /* As it is an strided destination, we write n-times more being n the
89 * size ratio between source and destination types. Update
90 * size_written accordingly.
92 inst
->size_written
= inst
->dst
.component_size(inst
->exec_size
);
93 inst
->saturate
= false;
94 /* Now, do the conversion to original destination's type. In next iteration,
95 * we will lower it if it is a d2f conversion.
97 ibld
.at(block
, inst
->next
).MOV(dst
, temp0
)->saturate
= saturate
;
104 invalidate_live_intervals();