X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_lower_tex.c;h=2e68f2b164eb6de20806a55fe25d73e287943f9e;hb=d8fdb8dab4658de2d255718627ee671e84fedf2d;hp=1ccd253320c2f1f7319801f344c44d2c252417e9;hpb=a331d7d1cdfdc971f707fb6b1f71edbad622c804;p=mesa.git diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 1ccd253320c..2e68f2b164e 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -37,14 +37,42 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_builtin_builder.h" +#include "nir_format_convert.h" + +static float bt601_csc_coeffs[9] = { + 1.16438356f, 1.16438356f, 1.16438356f, + 0.0f, -0.39176229f, 2.01723214f, + 1.59602678f, -0.81296764f, 0.0f, +}; +static float bt709_csc_coeffs[9] = { + 1.16438356f, 1.16438356f, 1.16438356f, + 0.0f , -0.21324861f, 2.11240179f, + 1.79274107f, -0.53290933f, 0.0f, +}; +static float bt2020_csc_coeffs[9] = { + 1.16438356f, 1.16438356f, 1.16438356f, + 0.0f , -0.18732610f, 2.14177232f, + 1.67867411f, -0.65042432f, 0.0f, +}; + +static float bt601_csc_offsets[3] = { + -0.874202218f, 0.531667823f, -1.085630789f +}; +static float bt709_csc_offsets[3] = { + -0.972945075f, 0.301482665f, -1.133402218f +}; +static float bt2020_csc_offsets[3] = { + -0.915687932f, 0.347458499f, -1.148145075f +}; -static void +static bool project_src(nir_builder *b, nir_tex_instr *tex) { /* Find the projector in the srcs list, if present. */ int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); if (proj_index < 0) - return; + return false; b->cursor = nir_before_instr(&tex->instr); @@ -99,54 +127,7 @@ project_src(nir_builder *b, nir_tex_instr *tex) } nir_tex_instr_remove_src(tex, proj_index); -} - -static nir_ssa_def * -get_texture_size(nir_builder *b, nir_tex_instr *tex) -{ - b->cursor = nir_before_instr(&tex->instr); - - nir_tex_instr *txs; - - unsigned num_srcs = 1; /* One for the LOD */ - for (unsigned i = 0; i < tex->num_srcs; i++) { - if (tex->src[i].src_type == nir_tex_src_texture_deref || - tex->src[i].src_type == nir_tex_src_sampler_deref || - tex->src[i].src_type == nir_tex_src_texture_offset || - tex->src[i].src_type == nir_tex_src_sampler_offset) - num_srcs++; - } - - txs = nir_tex_instr_create(b->shader, num_srcs); - txs->op = nir_texop_txs; - txs->sampler_dim = tex->sampler_dim; - txs->is_array = tex->is_array; - txs->is_shadow = tex->is_shadow; - txs->is_new_style_shadow = tex->is_new_style_shadow; - txs->texture_index = tex->texture_index; - txs->sampler_index = tex->sampler_index; - txs->dest_type = nir_type_int; - - unsigned idx = 0; - for (unsigned i = 0; i < tex->num_srcs; i++) { - if (tex->src[i].src_type == nir_tex_src_texture_deref || - tex->src[i].src_type == nir_tex_src_sampler_deref || - tex->src[i].src_type == nir_tex_src_texture_offset || - tex->src[i].src_type == nir_tex_src_sampler_offset) { - nir_src_copy(&txs->src[idx].src, &tex->src[i].src, txs); - txs->src[idx].src_type = tex->src[i].src_type; - idx++; - } - } - /* Add in an LOD because some back-ends require it */ - txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0)); - txs->src[idx].src_type = nir_tex_src_lod; - - nir_ssa_dest_init(&txs->instr, &txs->dest, - nir_tex_instr_dest_size(txs), 32, NULL); - nir_builder_instr_insert(b, &txs->instr); - - return nir_i2f32(b, &txs->dest.ssa); + return true; } static bool @@ -171,7 +152,7 @@ lower_offset(nir_builder *b, nir_tex_instr *tex) if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); } else { - nir_ssa_def *txs = get_texture_size(b, tex); + nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); nir_ssa_def *scale = nir_frcp(b, txs); offset_coord = nir_fadd(b, coord, @@ -208,7 +189,12 @@ lower_offset(nir_builder *b, nir_tex_instr *tex) static void lower_rect(nir_builder *b, nir_tex_instr *tex) { - nir_ssa_def *txs = get_texture_size(b, tex); + /* Set the sampler_dim to 2D here so that get_texture_size picks up the + * right dimensionality. + */ + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + + nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); nir_ssa_def *scale = nir_frcp(b, txs); /* Walk through the sources normalizing the requested arguments. */ @@ -222,12 +208,41 @@ lower_rect(nir_builder *b, nir_tex_instr *tex) &tex->src[i].src, nir_src_for_ssa(nir_fmul(b, coords, scale))); } +} - tex->sampler_dim = GLSL_SAMPLER_DIM_2D; +static void +lower_implicit_lod(nir_builder *b, nir_tex_instr *tex) +{ + assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb); + assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0); + assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); + assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); + + b->cursor = nir_before_instr(&tex->instr); + + nir_ssa_def *lod = nir_get_texture_lod(b, tex); + + int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); + if (bias_idx >= 0) { + /* If we have a bias, add it in */ + lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); + nir_tex_instr_remove_src(tex, bias_idx); + } + + int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); + if (min_lod_idx >= 0) { + /* If we have a minimum LOD, clamp LOD accordingly */ + lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); + nir_tex_instr_remove_src(tex, min_lod_idx); + } + + nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); + tex->op = nir_texop_txl; } static nir_ssa_def * -sample_plane(nir_builder *b, nir_tex_instr *tex, int plane) +sample_plane(nir_builder *b, nir_tex_instr *tex, int plane, + const nir_lower_tex_options *options) { assert(tex->dest.is_ssa); assert(nir_tex_instr_dest_size(tex) == 4); @@ -251,145 +266,188 @@ sample_plane(nir_builder *b, nir_tex_instr *tex, int plane) plane_tex->texture_index = tex->texture_index; plane_tex->sampler_index = tex->sampler_index; - nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL); + nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, + nir_dest_bit_size(tex->dest), NULL); nir_builder_instr_insert(b, &plane_tex->instr); + /* If scaling_factor is set, return a scaled value. */ + if (options->scale_factors[tex->texture_index]) + return nir_fmul_imm(b, &plane_tex->dest.ssa, + options->scale_factors[tex->texture_index]); + return &plane_tex->dest.ssa; } static void convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, - nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v) + nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, + nir_ssa_def *a, + const nir_lower_tex_options *options) { - nir_const_value m[3] = { - { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, - { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, - { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } + + float *offset_vals; + float *m_vals; + assert((options->bt709_external & options->bt2020_external) == 0); + if (options->bt709_external & (1 << tex->texture_index)) { + m_vals = bt709_csc_coeffs; + offset_vals = bt709_csc_offsets; + } else if (options->bt2020_external & (1 << tex->texture_index)) { + m_vals = bt2020_csc_coeffs; + offset_vals = bt2020_csc_offsets; + } else { + m_vals = bt601_csc_coeffs; + offset_vals = bt601_csc_offsets; + } + + nir_const_value m[3][4] = { + { { .f32 = m_vals[0] }, { .f32 = m_vals[1] }, { .f32 = m_vals[2] }, { .f32 = 0.0f } }, + { { .f32 = m_vals[3] }, { .f32 = m_vals[4] }, { .f32 = m_vals[5] }, { .f32 = 0.0f } }, + { { .f32 = m_vals[6] }, { .f32 = m_vals[7] }, { .f32 = m_vals[8] }, { .f32 = 0.0f } }, }; + unsigned bit_size = nir_dest_bit_size(tex->dest); - nir_ssa_def *yuv = + nir_ssa_def *offset = nir_vec4(b, - nir_fmul(b, nir_imm_float(b, 1.16438356f), - nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), - nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), - nir_imm_float(b, 0.0)); + nir_imm_float(b, offset_vals[0]), + nir_imm_float(b, offset_vals[1]), + nir_imm_float(b, offset_vals[2]), + a); - nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); - nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); - nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); + offset = nir_f2fN(b, offset, bit_size); - nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f)); + nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[0]), bit_size); + nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[1]), bit_size); + nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[2]), bit_size); + + nir_ssa_def *result = + nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset))); nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result)); } static void -lower_y_uv_external(nir_builder *b, nir_tex_instr *tex) +lower_y_uv_external(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) { b->cursor = nir_after_instr(&tex->instr); - nir_ssa_def *y = sample_plane(b, tex, 0); - nir_ssa_def *uv = sample_plane(b, tex, 1); + nir_ssa_def *y = sample_plane(b, tex, 0, options); + nir_ssa_def *uv = sample_plane(b, tex, 1, options); convert_yuv_to_rgb(b, tex, nir_channel(b, y, 0), nir_channel(b, uv, 0), - nir_channel(b, uv, 1)); + nir_channel(b, uv, 1), + nir_imm_float(b, 1.0f), + options); } static void -lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex) +lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) { b->cursor = nir_after_instr(&tex->instr); - nir_ssa_def *y = sample_plane(b, tex, 0); - nir_ssa_def *u = sample_plane(b, tex, 1); - nir_ssa_def *v = sample_plane(b, tex, 2); + nir_ssa_def *y = sample_plane(b, tex, 0, options); + nir_ssa_def *u = sample_plane(b, tex, 1, options); + nir_ssa_def *v = sample_plane(b, tex, 2, options); convert_yuv_to_rgb(b, tex, nir_channel(b, y, 0), nir_channel(b, u, 0), - nir_channel(b, v, 0)); + nir_channel(b, v, 0), + nir_imm_float(b, 1.0f), + options); } static void -lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex) +lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) { b->cursor = nir_after_instr(&tex->instr); - nir_ssa_def *y = sample_plane(b, tex, 0); - nir_ssa_def *xuxv = sample_plane(b, tex, 1); + nir_ssa_def *y = sample_plane(b, tex, 0, options); + nir_ssa_def *xuxv = sample_plane(b, tex, 1, options); convert_yuv_to_rgb(b, tex, nir_channel(b, y, 0), nir_channel(b, xuxv, 1), - nir_channel(b, xuxv, 3)); + nir_channel(b, xuxv, 3), + nir_imm_float(b, 1.0f), + options); } static void -lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex) +lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) { b->cursor = nir_after_instr(&tex->instr); - nir_ssa_def *y = sample_plane(b, tex, 0); - nir_ssa_def *uxvx = sample_plane(b, tex, 1); + nir_ssa_def *y = sample_plane(b, tex, 0, options); + nir_ssa_def *uxvx = sample_plane(b, tex, 1, options); convert_yuv_to_rgb(b, tex, nir_channel(b, y, 1), nir_channel(b, uxvx, 0), - nir_channel(b, uxvx, 2)); + nir_channel(b, uxvx, 2), + nir_imm_float(b, 1.0f), + options); +} + +static void +lower_ayuv_external(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) +{ + b->cursor = nir_after_instr(&tex->instr); + + nir_ssa_def *ayuv = sample_plane(b, tex, 0, options); + + convert_yuv_to_rgb(b, tex, + nir_channel(b, ayuv, 2), + nir_channel(b, ayuv, 1), + nir_channel(b, ayuv, 0), + nir_channel(b, ayuv, 3), + options); +} + +static void +lower_xyuv_external(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) +{ + b->cursor = nir_after_instr(&tex->instr); + + nir_ssa_def *xyuv = sample_plane(b, tex, 0, options); + + convert_yuv_to_rgb(b, tex, + nir_channel(b, xyuv, 2), + nir_channel(b, xyuv, 1), + nir_channel(b, xyuv, 0), + nir_imm_float(b, 1.0f), + options); } /* - * Emits a textureLod operation used to replace an existing - * textureGrad instruction. + * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod + * computed from the gradients. */ static void replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex) { - /* We are going to emit a textureLod() with the same parameters except that - * we replace ddx/ddy with lod. - */ - int num_srcs = tex->num_srcs - 1; - nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs); - - txl->op = nir_texop_txl; - txl->sampler_dim = tex->sampler_dim; - txl->texture_index = tex->texture_index; - txl->dest_type = tex->dest_type; - txl->is_array = tex->is_array; - txl->is_shadow = tex->is_shadow; - txl->is_new_style_shadow = tex->is_new_style_shadow; - txl->sampler_index = tex->sampler_index; - txl->coord_components = tex->coord_components; - - nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL); - - int src_num = 0; - for (int i = 0; i < tex->num_srcs; i++) { - if (tex->src[i].src_type == nir_tex_src_ddx || - tex->src[i].src_type == nir_tex_src_ddy) - continue; - nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl); - txl->src[src_num].src_type = tex->src[i].src_type; - src_num++; - } - - txl->src[src_num].src = nir_src_for_ssa(lod); - txl->src[src_num].src_type = nir_tex_src_lod; - src_num++; - - assert(src_num == num_srcs); + assert(tex->op == nir_texop_txd); - nir_ssa_dest_init(&txl->instr, &txl->dest, - tex->dest.ssa.num_components, 32, NULL); - nir_builder_instr_insert(b, &txl->instr); + nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx)); + nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy)); - nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa)); + int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); + if (min_lod_idx >= 0) { + /* If we have a minimum LOD, clamp LOD accordingly */ + lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); + nir_tex_instr_remove_src(tex, min_lod_idx); + } - nir_instr_remove(&tex->instr); + nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); + tex->op = nir_texop_txl; } static void @@ -400,7 +458,7 @@ lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) assert(tex->dest.is_ssa); /* Use textureSize() to get the width and height of LOD 0 */ - nir_ssa_def *size = get_texture_size(b, tex); + nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex)); /* Cubemap texture lookups first generate a texture coordinate normalized * to [-1, 1] on the appropiate face. The appropiate face is determined @@ -479,26 +537,26 @@ lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); - unsigned yzx[4] = { 1, 2, 0, 0 }; - unsigned xzy[4] = { 0, 2, 1, 0 }; + unsigned yzx[3] = { 1, 2, 0 }; + unsigned xzy[3] = { 0, 2, 1 }; Q = nir_bcsel(b, cond_z, p, nir_bcsel(b, cond_y, - nir_swizzle(b, p, xzy, 3, false), - nir_swizzle(b, p, yzx, 3, false))); + nir_swizzle(b, p, xzy, 3), + nir_swizzle(b, p, yzx, 3))); dQdx = nir_bcsel(b, cond_z, dPdx, nir_bcsel(b, cond_y, - nir_swizzle(b, dPdx, xzy, 3, false), - nir_swizzle(b, dPdx, yzx, 3, false))); + nir_swizzle(b, dPdx, xzy, 3), + nir_swizzle(b, dPdx, yzx, 3))); dQdy = nir_bcsel(b, cond_z, dPdy, nir_bcsel(b, cond_y, - nir_swizzle(b, dPdy, xzy, 3, false), - nir_swizzle(b, dPdy, yzx, 3, false))); + nir_swizzle(b, dPdy, xzy, 3), + nir_swizzle(b, dPdy, yzx, 3))); /* 2. quotient rule */ @@ -508,16 +566,15 @@ lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) */ nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); - unsigned xy[4] = { 0, 1, 0, 0 }; - nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false); + nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3); nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); - nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false); + nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3); nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2); nir_ssa_def *dx = nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); - nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false); + nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3); nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2); nir_ssa_def *dy = nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); @@ -543,6 +600,12 @@ lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) static void lower_gradient(nir_builder *b, nir_tex_instr *tex) { + /* Cubes are more complicated and have their own function */ + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + lower_gradient_cube_map(b, tex); + return; + } + assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); assert(tex->op == nir_texop_txd); assert(tex->dest.is_ssa); @@ -562,7 +625,8 @@ lower_gradient(nir_builder *b, nir_tex_instr *tex) } nir_ssa_def *size = - nir_channels(b, get_texture_size(b, tex), component_mask); + nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)), + component_mask); /* Scale the gradients by width and height. Effectively, the incoming * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the @@ -624,7 +688,7 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) /* non-normalized texture coords, so clamp to texture * size rather than [0.0, 1.0] */ - nir_ssa_def *txs = get_texture_size(b, tex); + nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); } else { @@ -645,23 +709,38 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) static nir_ssa_def * get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) { - nir_const_value v; + nir_const_value v[4]; memset(&v, 0, sizeof(v)); if (swizzle_val == 4) { - v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0; + v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0; } else { assert(swizzle_val == 5); if (type == nir_type_float) - v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0; + v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0; else - v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1; + v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1; } return nir_build_imm(b, 4, 32, v); } +static void +swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex) +{ + assert(tex->dest.is_ssa); + + b->cursor = nir_after_instr(&tex->instr); + + assert(nir_tex_instr_dest_size(tex) == 4); + unsigned swiz[4] = { 2, 3, 1, 0 }; + nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), + swizzled->parent_instr); +} + static void swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) { @@ -684,7 +763,7 @@ swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) swizzle[2] < 4 && swizzle[3] < 4) { unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; /* We have no 0s or 1s, just emit a swizzling MOV */ - swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); + swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); } else { nir_ssa_def *srcs[4]; for (unsigned i = 0; i < 4; i++) { @@ -711,24 +790,8 @@ linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) b->cursor = nir_after_instr(&tex->instr); - static const unsigned swiz[4] = {0, 1, 2, 0}; - nir_ssa_def *comp = nir_swizzle(b, &tex->dest.ssa, swiz, 3, true); - - /* Formula is: - * (comp <= 0.04045) ? - * (comp / 12.92) : - * pow((comp + 0.055) / 1.055, 2.4) - */ - nir_ssa_def *low = nir_fmul(b, comp, nir_imm_float(b, 1.0 / 12.92)); - nir_ssa_def *high = nir_fpow(b, - nir_fmul(b, - nir_fadd(b, - comp, - nir_imm_float(b, 0.055)), - nir_imm_float(b, 1.0 / 1.055)), - nir_imm_float(b, 2.4)); - nir_ssa_def *cond = nir_fge(b, nir_imm_float(b, 0.04045), comp); - nir_ssa_def *rgb = nir_bcsel(b, cond, low, high); + nir_ssa_def *rgb = + nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7)); /* alpha is untouched: */ nir_ssa_def *result = nir_vec4(b, @@ -741,6 +804,177 @@ linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) result->parent_instr); } +/** + * Lowers texture instructions from giving a vec4 result to a vec2 of f16, + * i16, or u16, or a single unorm4x8 value. + * + * Note that we don't change the destination num_components, because + * nir_tex_instr_dest_size() will still return 4. The driver is just expected + * to not store the other channels, given that nothing at the NIR level will + * read them. + */ +static void +lower_tex_packing(nir_builder *b, nir_tex_instr *tex, + const nir_lower_tex_options *options) +{ + nir_ssa_def *color = &tex->dest.ssa; + + b->cursor = nir_after_instr(&tex->instr); + + switch (options->lower_tex_packing[tex->sampler_index]) { + case nir_lower_tex_packing_none: + return; + + case nir_lower_tex_packing_16: { + static const unsigned bits[4] = {16, 16, 16, 16}; + + switch (nir_alu_type_get_base_type(tex->dest_type)) { + case nir_type_float: + if (tex->is_shadow && tex->is_new_style_shadow) { + color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0)); + } else { + nir_ssa_def *rg = nir_channel(b, color, 0); + nir_ssa_def *ba = nir_channel(b, color, 1); + color = nir_vec4(b, + nir_unpack_half_2x16_split_x(b, rg), + nir_unpack_half_2x16_split_y(b, rg), + nir_unpack_half_2x16_split_x(b, ba), + nir_unpack_half_2x16_split_y(b, ba)); + } + break; + + case nir_type_int: + color = nir_format_unpack_sint(b, color, bits, 4); + break; + + case nir_type_uint: + color = nir_format_unpack_uint(b, color, bits, 4); + break; + + default: + unreachable("unknown base type"); + } + break; + } + + case nir_lower_tex_packing_8: + assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); + color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0)); + break; + } + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color), + color->parent_instr); +} + +static bool +sampler_index_lt(nir_tex_instr *tex, unsigned max) +{ + assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1); + + unsigned sampler_index = tex->sampler_index; + + int sampler_offset_idx = + nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset); + if (sampler_offset_idx >= 0) { + if (!nir_src_is_const(tex->src[sampler_offset_idx].src)) + return false; + + sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src); + } + + return sampler_index < max; +} + +static bool +lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) +{ + assert(tex->op == nir_texop_tg4); + assert(nir_tex_instr_has_explicit_tg4_offsets(tex)); + assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1); + + b->cursor = nir_after_instr(&tex->instr); + + nir_ssa_def *dest[4]; + for (unsigned i = 0; i < 4; ++i) { + nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1); + tex_copy->op = tex->op; + tex_copy->coord_components = tex->coord_components; + tex_copy->sampler_dim = tex->sampler_dim; + tex_copy->is_array = tex->is_array; + tex_copy->is_shadow = tex->is_shadow; + tex_copy->is_new_style_shadow = tex->is_new_style_shadow; + tex_copy->component = tex->component; + tex_copy->dest_type = tex->dest_type; + + for (unsigned j = 0; j < tex->num_srcs; ++j) { + nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy); + tex_copy->src[j].src_type = tex->src[j].src_type; + } + + nir_tex_src src; + src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0], + tex->tg4_offsets[i][1])); + src.src_type = nir_tex_src_offset; + tex_copy->src[tex_copy->num_srcs - 1] = src; + + nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest, + nir_tex_instr_dest_size(tex), 32, NULL); + + nir_builder_instr_insert(b, &tex_copy->instr); + + dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3); + } + + nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]); + nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res)); + nir_instr_remove(&tex->instr); + + return true; +} + +static bool +nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex) +{ + int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); + if (lod_idx < 0 || + (nir_src_is_const(tex->src[lod_idx].src) && + nir_src_as_int(tex->src[lod_idx].src) == 0)) + return false; + + unsigned dest_size = nir_tex_instr_dest_size(tex); + + b->cursor = nir_before_instr(&tex->instr); + nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1); + + /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */ + nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src, + nir_src_for_ssa(nir_imm_int(b, 0))); + + /* TXS(LOD) = max(TXS(0) >> LOD, 1) */ + b->cursor = nir_after_instr(&tex->instr); + nir_ssa_def *minified = nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod), + nir_imm_int(b, 1)); + + /* Make sure the component encoding the array size (if any) is not + * minified. + */ + if (tex->is_array) { + nir_ssa_def *comp[3]; + + assert(dest_size <= ARRAY_SIZE(comp)); + for (unsigned i = 0; i < dest_size - 1; i++) + comp[i] = nir_channel(b, minified, i); + + comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1); + minified = nir_vec(b, comp, dest_size); + } + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(minified), + minified->parent_instr); + return true; +} + static bool nir_lower_tex_block(nir_block *block, nir_builder *b, const nir_lower_tex_options *options) @@ -768,8 +1002,7 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, * as clamping happens *after* projection: */ if (lower_txp || sat_mask) { - project_src(b, tex); - progress = true; + progress |= project_src(b, tex); } if ((tex->op == nir_texop_txf && options->lower_txf_offset) || @@ -779,28 +1012,39 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, progress = lower_offset(b, tex) || progress; } - if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) { + if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect && + tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) { lower_rect(b, tex); progress = true; } if ((1 << tex->texture_index) & options->lower_y_uv_external) { - lower_y_uv_external(b, tex); + lower_y_uv_external(b, tex, options); progress = true; } if ((1 << tex->texture_index) & options->lower_y_u_v_external) { - lower_y_u_v_external(b, tex); + lower_y_u_v_external(b, tex, options); progress = true; } if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) { - lower_yx_xuxv_external(b, tex); + lower_yx_xuxv_external(b, tex, options); progress = true; } if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) { - lower_xy_uxvx_external(b, tex); + lower_xy_uxvx_external(b, tex, options); + progress = true; + } + + if ((1 << tex->texture_index) & options->lower_ayuv_external) { + lower_ayuv_external(b, tex, options); + progress = true; + } + + if ((1 << tex->texture_index) & options->lower_xyuv_external) { + lower_xyuv_external(b, tex, options); progress = true; } @@ -809,6 +1053,11 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, progress = true; } + if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) { + swizzle_tg4_broadcom(b, tex); + progress = true; + } + if (((1 << tex->texture_index) & options->swizzle_result) && !nir_tex_instr_is_query(tex) && !(tex->is_shadow && tex->is_new_style_shadow)) { @@ -823,38 +1072,77 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, progress = true; } - if (tex->op == nir_texop_txd && - tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - (options->lower_txd || - options->lower_txd_cube_map || - (tex->is_shadow && options->lower_txd_shadow))) { - lower_gradient_cube_map(b, tex); + const bool has_min_lod = + nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0; + const bool has_offset = + nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; + + if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod && + options->lower_txb_shadow_clamp) { + lower_implicit_lod(b, tex); + progress = true; + } + + if (options->lower_tex_packing[tex->sampler_index] != + nir_lower_tex_packing_none && + tex->op != nir_texop_txs && + tex->op != nir_texop_query_levels) { + lower_tex_packing(b, tex, options); progress = true; - continue; } if (tex->op == nir_texop_txd && (options->lower_txd || - (options->lower_txd_shadow && - tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE))) { + (options->lower_txd_shadow && tex->is_shadow) || + (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) || + (options->lower_txd_offset_clamp && has_offset && has_min_lod) || + (options->lower_txd_clamp_bindless_sampler && has_min_lod && + nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) || + (options->lower_txd_clamp_if_sampler_index_not_lt_16 && + has_min_lod && !sampler_index_lt(tex, 16)) || + (options->lower_txd_cube_map && + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) || + (options->lower_txd_3d && + tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) { lower_gradient(b, tex); progress = true; continue; } + bool shader_supports_implicit_lod = + b->shader->info.stage == MESA_SHADER_FRAGMENT || + (b->shader->info.stage == MESA_SHADER_COMPUTE && + b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE); + /* TXF, TXS and TXL require a LOD but not everything we implement using those * three opcodes provides one. Provide a default LOD of 0. */ if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && (tex->op == nir_texop_txf || tex->op == nir_texop_txs || tex->op == nir_texop_txl || tex->op == nir_texop_query_levels || - (tex->op == nir_texop_tex && - b->shader->info.stage != MESA_SHADER_FRAGMENT))) { + (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) { b->cursor = nir_before_instr(&tex->instr); nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0))); + if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod) + tex->op = nir_texop_txl; progress = true; continue; } + + if (options->lower_txs_lod && tex->op == nir_texop_txs) { + progress |= nir_lower_txs_lod(b, tex); + continue; + } + + /* has to happen after all the other lowerings as the original tg4 gets + * replaced by 4 tg4 instructions. + */ + if (tex->op == nir_texop_tg4 && + nir_tex_instr_has_explicit_tg4_offsets(tex) && + options->lower_tg4_offsets) { + progress |= lower_tg4_offsets(b, tex); + continue; + } } return progress;