2 * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #include "compiler/nir/nir.h"
28 #include "util/u_dynarray.h"
29 #include "util/register_allocate.h"
31 /* To be shoved inside panfrost_screen for the Gallium driver, or somewhere
32 * else for Vulkan/standalone. The single compiler "screen" to be shared across
33 * all shader compiles, used to store complex initialization (for instance,
34 * related to register allocation) */
36 struct midgard_screen
{
37 /* Precomputed register allocation sets for varying numbers of work
38 * registers. The zeroeth entry corresponds to 8 work registers. The
39 * eighth entry corresponds to 16 work registers. NULL if this set has
40 * not been allocated yet. */
42 struct ra_regs
*regs
[9];
44 /* Work register classes corresponds to the above register
45 * sets. 12 per set for 4 classes per work/ldst/tex */
47 unsigned reg_classes
[9][12];
50 /* Define the general compiler entry point */
52 #define MAX_SYSVAL_COUNT 32
54 /* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
55 * their class for equal comparison */
57 #define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
58 #define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
59 #define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
61 /* Define some common types. We start at one for easy indexing of hash
62 * tables internal to the compiler */
65 PAN_SYSVAL_VIEWPORT_SCALE
= 1,
66 PAN_SYSVAL_VIEWPORT_OFFSET
= 2,
67 PAN_SYSVAL_TEXTURE_SIZE
= 3,
70 #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \
71 ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
73 #define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f)
74 #define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3)
75 #define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
78 int work_register_count
;
82 /* Prepended before uniforms, mapping to SYSVAL_ names for the
85 unsigned sysval_count
;
86 unsigned sysvals
[MAX_SYSVAL_COUNT
];
88 unsigned varyings
[32];
90 /* Boolean properties of the program */
91 bool writes_point_size
;
95 struct util_dynarray compiled
;
97 /* For a blend shader using a constant color -- patch point. If
98 * negative, there's no constant. */
100 int blend_patch_offset
;
102 /* The number of bytes to allocate per-thread for Thread Local Storage
103 * (register spilling), or zero if no spilling is used */
106 /* IN: For a fragment shader with a lowered alpha test, the ref value */
111 midgard_compile_shader_nir(struct midgard_screen
*screen
, nir_shader
*nir
, midgard_program
*program
, bool is_blend
);
113 /* NIR options are shared between the standalone compiler and the online
114 * compiler. Defining it here is the simplest, though maybe not the Right
117 static const nir_shader_compiler_options midgard_nir_options
= {
121 .lower_flrp32
= true,
122 .lower_flrp64
= true,
123 .lower_ffract
= true,
129 .lower_find_lsb
= true,
131 .lower_wpos_pntc
= true,
133 /* TODO: We have native ops to help here, which we'll want to look into
137 .vertex_id_zero_based
= true,
138 .lower_extract_byte
= true,
139 .lower_extract_word
= true,
140 .lower_rotate
= true,
142 .lower_doubles_options
= nir_lower_dmod
,
144 .vectorize_io
= true,