Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- commit 160a4208f8907e6f6f8d7f2877214c2524318b80
- Author: Suici Doga <suiciwd@gmail.com>
- Date: Wed Feb 8 18:10:32 2017 +0530
- WIP
- diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
- index df083d7..e0083e2 100644
- --- a/src/gallium/drivers/r600/Makefile.sources
- +++ b/src/gallium/drivers/r600/Makefile.sources
- @@ -45,6 +45,7 @@ CXX_SOURCES = \
- sb/sb_sched.cpp \
- sb/sb_shader.cpp \
- sb/sb_ssa_builder.cpp \
- + sb/sb_tgsi.cpp \
- sb/sb_valtable.cpp
- LLVM_C_SOURCES = r600_llvm.c
- diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
- index c6bb869..504ef42 100644
- --- a/src/gallium/drivers/r600/r600_isa.h
- +++ b/src/gallium/drivers/r600/r600_isa.h
- @@ -42,6 +42,10 @@ enum alu_op_flags
- AF_4V = (AF_V | AF_4SLOT),
- AF_VS = (AF_V | AF_S), /* allowed in any slot */
- + // MULLO_INT, MULHI_INT (and _UINT) should be expanded to 4 slots
- + AF_CM_EXPAND = (1<<3),
- + AF_4VE = (AF_4V | AF_CM_EXPAND),
- +
- AF_KILL = (1<<4),
- AF_PRED = (1<<5),
- AF_SET = (1<<6),
- @@ -285,10 +289,10 @@ static const struct alu_op_info alu_op_table[] = {
- {"SQRT_IEEE", 1, { 0x6A, 0x8A },{ AF_S, AF_S, AF_S, AF_S}, AF_IEEE },
- {"SIN", 1, { 0x6E, 0x8D },{ AF_S, AF_S, AF_S, AF_S}, 0 },
- {"COS", 1, { 0x6F, 0x8E },{ AF_S, AF_S, AF_S, AF_S}, 0 },
- - {"MULLO_INT", 2, { 0x73, 0x8F },{ AF_S, AF_S, AF_S, AF_4V}, AF_M_COMM | AF_INT_DST | AF_REPL},
- - {"MULHI_INT", 2, { 0x74, 0x90 },{ AF_S, AF_S, AF_S, AF_4V}, AF_M_COMM | AF_INT_DST | AF_REPL},
- - {"MULLO_UINT", 2, { 0x75, 0x91 },{ AF_S, AF_S, AF_S, AF_4V}, AF_M_COMM | AF_UINT_DST | AF_REPL},
- - {"MULHI_UINT", 2, { 0x76, 0x92 },{ AF_S, AF_S, AF_S, AF_4V}, AF_M_COMM | AF_UINT_DST | AF_REPL},
- + {"MULLO_INT", 2, { 0x73, 0x8F },{ AF_S, AF_S, AF_S, AF_4VE}, AF_M_COMM | AF_INT_DST | AF_REPL},
- + {"MULHI_INT", 2, { 0x74, 0x90 },{ AF_S, AF_S, AF_S, AF_4VE}, AF_M_COMM | AF_INT_DST | AF_REPL},
- + {"MULLO_UINT", 2, { 0x75, 0x91 },{ AF_S, AF_S, AF_S, AF_4VE}, AF_M_COMM | AF_UINT_DST | AF_REPL},
- + {"MULHI_UINT", 2, { 0x76, 0x92 },{ AF_S, AF_S, AF_S, AF_4VE}, AF_M_COMM | AF_UINT_DST | AF_REPL},
- {"RECIP_INT", 1, { 0x77, 0x93 },{ AF_S, AF_S, AF_S, 0}, AF_INT_DST },
- {"RECIP_UINT", 1, { 0x78, 0x94 },{ AF_S, AF_S, AF_S, 0}, AF_UINT_DST },
- {"RECIP_64", 2, { -1, 0x95 },{ 0, 0, AF_S, AF_S}, AF_64 },
- diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
- index 49abf50..4006b7a 100644
- --- a/src/gallium/drivers/r600/r600_pipe.c
- +++ b/src/gallium/drivers/r600/r600_pipe.c
- @@ -75,6 +75,7 @@ static const struct debug_named_value debug_options[] = {
- { "sbnofallback", DBG_SB_NO_FALLBACK, "Abort on errors instead of fallback" },
- { "sbdisasm", DBG_SB_DISASM, "Use sb disassembler for shader dumps" },
- { "sbsafemath", DBG_SB_SAFEMATH, "Disable unsafe math optimizations" },
- + { "sbtgsi", DBG_SB_TGSI, "Use sb and its own tgsi translator"},
- DEBUG_NAMED_VALUE_END /* must be last */
- };
- diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
- index 349a6cb..86537a5 100644
- --- a/src/gallium/drivers/r600/r600_pipe.h
- +++ b/src/gallium/drivers/r600/r600_pipe.h
- @@ -257,6 +257,7 @@ typedef boolean (*r600g_dma_blit_t)(struct pipe_context *ctx,
- #define DBG_SB_NO_FALLBACK (1 << 26)
- #define DBG_SB_DISASM (1 << 27)
- #define DBG_SB_SAFEMATH (1 << 28)
- +#define DBG_SB_TGSI (1 << 29)
- struct r600_tiling_info {
- unsigned num_channels;
- diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
- index dc44fae..2023bab 100644
- --- a/src/gallium/drivers/r600/r600_shader.c
- +++ b/src/gallium/drivers/r600/r600_shader.c
- @@ -141,6 +141,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
- uint32_t *ptr;
- bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
- unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
- + unsigned use_sbtgsi = rctx->screen->debug_flags & DBG_SB_TGSI;
- unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
- shader->shader.bc.isa = rctx->isa;
- @@ -153,35 +154,46 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
- r600_dump_streamout(&sel->so);
- }
- }
- - r = r600_shader_from_tgsi(rctx->screen, shader, key);
- - if (r) {
- - R600_ERR("translation from TGSI failed !\n");
- - return r;
- - }
- - /* Check if the bytecode has already been built. When using the llvm
- - * backend, r600_shader_from_tgsi() will take care of building the
- - * bytecode.
- - */
- - if (!shader->shader.bc.bytecode) {
- - r = r600_bytecode_build(&shader->shader.bc);
- + if (use_sbtgsi) {
- + r = r600_sb_compile_tgsi(rctx, shader, key, dump);
- if (r) {
- - R600_ERR("building bytecode failed !\n");
- + R600_ERR("SB: TGSI compilation failed!\n");
- return r;
- }
- - }
- - if (dump && !sb_disasm) {
- - fprintf(stderr, "--------------------------------------------------------------\n");
- - r600_bytecode_disasm(&shader->shader.bc);
- - fprintf(stderr, "______________________________________________________________\n");
- - } else if ((dump && sb_disasm) || use_sb) {
- - r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
- - dump, use_sb);
- + } else {
- +
- + r = r600_shader_from_tgsi(rctx->screen, shader, key);
- if (r) {
- - R600_ERR("r600_sb_bytecode_process failed !\n");
- + R600_ERR("translation from TGSI failed !\n");
- return r;
- }
- +
- + /* Check if the bytecode has already been built. When using the llvm
- + * backend, r600_shader_from_tgsi() will take care of building the
- + * bytecode.
- + */
- + if (!shader->shader.bc.bytecode) {
- + r = r600_bytecode_build(&shader->shader.bc);
- + if (r) {
- + R600_ERR("building bytecode failed !\n");
- + return r;
- + }
- + }
- +
- + if (dump && !sb_disasm) {
- + fprintf(stderr, "--------------------------------------------------------------\n");
- + r600_bytecode_disasm(&shader->shader.bc);
- + fprintf(stderr, "______________________________________________________________\n");
- + } else if ((dump && sb_disasm) || use_sb) {
- + r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
- + dump, use_sb);
- + if (r) {
- + R600_ERR("r600_sb_bytecode_process failed !\n");
- + return r;
- + }
- + }
- }
- /* Store the shader in a buffer. */
- diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
- index ad1b862..89c8c4e 100644
- --- a/src/gallium/drivers/r600/sb/sb_bc.h
- +++ b/src/gallium/drivers/r600/sb/sb_bc.h
- @@ -902,14 +902,13 @@ public:
- class bc_builder {
- shader &sh;
- sb_context &ctx;
- - bytecode bb;
- + bytecode &bb;
- int error;
- public:
- bc_builder(shader &s);
- int build();
- - bytecode& get_bytecode() { assert(!error); return bb; }
- private:
- diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
- index 55e2a85..9b065b6 100644
- --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
- @@ -31,7 +31,7 @@
- namespace r600_sb {
- bc_builder::bc_builder(shader &s)
- - : sh(s), ctx(s.get_ctx()), bb(ctx.hw_class_bit()), error(0) {}
- + : sh(s), ctx(s.get_ctx()), bb(s.get_bytecode()), error(0) {}
- int bc_builder::build() {
- diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
- index c56c866..4218990 100644
- --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
- @@ -32,6 +32,8 @@
- #define FBC_DUMP(q)
- #endif
- +#include "cmath"
- +
- #include "sb_bc.h"
- #include "sb_shader.h"
- #include "sb_pass.h"
- @@ -306,22 +308,26 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
- sel_chan gpr;
- switch (v->kind) {
- - case VLK_REL_REG:
- - sc = v->get_final_gpr();
- - src.sel = sc.sel();
- - src.chan = sc.chan();
- - if (!v->rel->is_const()) {
- - src.rel = 1;
- - update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
- - } else
- - src.rel = 0;
- -
- - break;
- case VLK_REG:
- - gpr = v->get_final_gpr();
- - src.sel = gpr.sel();
- - src.chan = gpr.chan();
- - update_ngpr(src.sel);
- + case VLK_TGSI_INPUT:
- + case VLK_TGSI_OUTPUT:
- + case VLK_TGSI_TEMP:
- + case VLK_TGSI_ADDR:
- + if (v->rel) {
- + sc = v->get_final_gpr();
- + src.sel = sc.sel();
- + src.chan = sc.chan();
- + if (!v->rel->is_const()) {
- + src.rel = 1;
- + update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
- + } else
- + src.rel = 0;
- + } else {
- + gpr = v->get_final_gpr();
- + src.sel = gpr.sel();
- + src.chan = gpr.chan();
- + update_ngpr(src.sel);
- + }
- break;
- case VLK_TEMP:
- src.sel = v->gpr.sel();
- @@ -333,13 +339,32 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
- literal lv = v->literal_value;
- src.chan = 0;
- + if (src.abs) {
- + lv.f = fabs(lv.f);
- + src.abs = 0;
- + }
- + if (src.neg) {
- + lv.f = -lv.f;
- + src.neg = 0;
- + }
- +
- if (lv == literal(0))
- src.sel = ALU_SRC_0;
- - else if (lv == literal(0.5f))
- + else if (lv == literal(0x80000000)) {
- + // XXX probably we shouldn't have -0 here in the first place?
- + src.sel = ALU_SRC_0;
- + src.neg = 1;
- + } else if (lv == literal(0.5f))
- src.sel = ALU_SRC_0_5;
- - else if (lv == literal(1.0f))
- + else if (lv == literal(-0.5f)) {
- + src.sel = ALU_SRC_0_5;
- + src.neg = 1;
- + } else if (lv == literal(1.0f))
- + src.sel = ALU_SRC_1;
- + else if (lv == literal(-1.0f)) {
- src.sel = ALU_SRC_1;
- - else if (lv == literal(1))
- + src.neg = 1;
- + } else if (lv == literal(1))
- src.sel = ALU_SRC_1_INT;
- else if (lv == literal(-1))
- src.sel = ALU_SRC_M_1_INT;
- @@ -477,9 +502,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
- value *v = f->src[chan];
- - if (v->is_undef()) {
- + if (!v)
- sel = SEL_MASK;
- - } else if (v->is_const()) {
- + else if (v->is_undef())
- + sel = SEL_0;
- + else if (v->is_const()) {
- literal l = v->literal_value;
- if (l == literal(0))
- sel = SEL_0;
- diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
- index 67e6c3a..320a081 100644
- --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
- @@ -112,7 +112,7 @@ int bc_parser::parse_decls() {
- if (!pshader) {
- if (gpr_reladdr)
- - sh->add_gpr_array(0, bc->ngpr, 0x0F);
- + sh->add_rel_array(VLK_REG, 0, bc->ngpr, 0x0F);
- // compute shaders have some values preloaded in R0, R1
- sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
- @@ -127,10 +127,11 @@ int bc_parser::parse_decls() {
- if (pshader->num_arrays) {
- for (unsigned i = 0; i < pshader->num_arrays; ++i) {
- r600_shader_array &a = pshader->arrays[i];
- - sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
- + sh->add_rel_array(VLK_REG, a.gpr_start, a.gpr_count,
- + a.comp_mask);
- }
- } else {
- - sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
- + sh->add_rel_array(VLK_REG, 0, pshader->bc.ngpr, 0x0F);
- }
- }
- @@ -329,7 +330,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
- unsigned src_count = n->bc.op_ptr->src_count;
- if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
- - n->flags |= NF_ALU_4SLOT;
- + n->flags |= NF_ALU_4SLOT; // XXX it seems flag is used in dump only
- n->src.resize(src_count);
- @@ -381,6 +382,14 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
- bc_alu_src &src = n->bc.src[s];
- if (src.sel == ALU_SRC_LITERAL) {
- + if (src.abs) {
- + src.value.f = fabs(src.value.f);
- + src.abs = 0;
- + }
- + if (src.neg) {
- + src.value.f = -src.value.f;
- + src.neg = 0;
- + }
- n->src[s] = sh->get_const_value(src.value);
- } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
- unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
- diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp
- index d907508..08dc032 100644
- --- a/src/gallium/drivers/r600/sb/sb_core.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_core.cpp
- @@ -28,7 +28,6 @@
- extern "C" {
- #include "os/os_time.h"
- -#include "r600_pipe.h"
- #include "r600_shader.h"
- #include "sb_public.h"
- @@ -41,6 +40,7 @@ extern "C" {
- #include "sb_shader.h"
- #include "sb_pass.h"
- #include "sb_sched.h"
- +#include "sb_tgsi.h"
- using namespace r600_sb;
- @@ -89,73 +89,9 @@ void r600_sb_context_destroy(void * sctx) {
- }
- }
- -int r600_sb_bytecode_process(struct r600_context *rctx,
- - struct r600_bytecode *bc,
- - struct r600_shader *pshader,
- - int dump_bytecode,
- - int optimize) {
- - int r = 0;
- - unsigned shader_id = bc->debug_id;
- +static int sb_build_bytecode(shader *sh, int dump_bytecode) {
- - sb_context *ctx = (sb_context *)rctx->sb_context;
- - if (!ctx) {
- - rctx->sb_context = ctx = r600_sb_context_create(rctx);
- - }
- -
- - int64_t time_start = 0;
- - if (sb_context::dump_stat) {
- - time_start = os_time_get_nano();
- - }
- -
- - SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
- -
- - bc_parser parser(*ctx, bc, pshader);
- -
- - if ((r = parser.decode())) {
- - assert(!"sb: bytecode decoding error");
- - return r;
- - }
- -
- - shader *sh = parser.get_shader();
- -
- - if (dump_bytecode) {
- - bc_dump(*sh, bc->bytecode, bc->ndw).run();
- - }
- -
- - if (!optimize) {
- - delete sh;
- - return 0;
- - }
- -
- - if (sh->target != TARGET_FETCH) {
- - sh->src_stats.ndw = bc->ndw;
- - sh->collect_stats(false);
- - }
- -
- - /* skip some shaders (use shaders from default backend)
- - * dskip_start - range start, dskip_end - range_end,
- - * e.g. start = 5, end = 6 means shaders 5 & 6
- - *
- - * dskip_mode == 0 - disabled,
- - * dskip_mode == 1 - don't process the shaders from the [start;end] range
- - * dskip_mode == 2 - process only the shaders from the range
- - */
- - if (sb_context::dskip_mode) {
- - if ((sb_context::dskip_start <= shader_id &&
- - shader_id <= sb_context::dskip_end) ==
- - (sb_context::dskip_mode == 1)) {
- - sblog << "sb: skipped shader " << shader_id << " : " << "["
- - << sb_context::dskip_start << "; "
- - << sb_context::dskip_end << "] mode "
- - << sb_context::dskip_mode << "\n";
- - return 0;
- - }
- - }
- -
- - if ((r = parser.prepare())) {
- - assert(!"sb: bytecode parsing error");
- - return r;
- - }
- + int r;
- SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); );
- @@ -240,29 +176,147 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
- return r;
- }
- - bytecode &nbc = builder.get_bytecode();
- + bytecode &nbc = sh->get_bytecode();
- if (dump_bytecode) {
- bc_dump(*sh, &nbc).run();
- }
- - if (!sb_context::dry_run) {
- + return 0;
- +}
- - free(bc->bytecode);
- - bc->ndw = nbc.ndw();
- - bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
- - nbc.write_data(bc->bytecode);
- - bc->ngpr = sh->ngpr;
- - bc->nstack = sh->nstack;
- - } else {
- - SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; );
- +int r600_sb_compile_tgsi(struct r600_context *rctx,
- + struct r600_pipe_shader *pipe_shader,
- + struct r600_shader_key key,
- + int dump_bytecode) {
- + int r = 0;
- +
- + r600_bytecode *bc = &pipe_shader->shader.bc;
- + r600_bytecode_init(bc, rctx->chip_class, rctx->family,
- + rctx->screen->has_compressed_msaa_texturing);
- +
- + sb_context *ctx = (sb_context *)rctx->sb_context;
- + if (!ctx) {
- + rctx->sb_context = ctx = r600_sb_context_create(rctx);
- + }
- +
- + int64_t time_start = 0;
- + if (sb_context::dump_stat) {
- + time_start = os_time_get_nano();
- }
- + unsigned shader_id = bc->debug_id;
- + SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
- +
- + // translate from tgsi
- +
- + tgsi_translator tt(*ctx, pipe_shader, key, shader_id);
- +
- + shader* sh = tt.translate();
- + assert(sh);
- +
- + r = sb_build_bytecode(sh, dump_bytecode);
- + if (r)
- + return r;
- +
- if (sb_context::dump_stat) {
- int64_t t = os_time_get_nano() - time_start;
- - sblog << "sb: processing shader " << shader_id << " done ( "
- + sblog << "sb: tgsi compilation of shader " << sh->id << " done ( "
- + << ((double)t)/1000000.0 << " ms ).\n";
- + }
- +
- + bytecode &nbc = sh->get_bytecode();
- +
- + bc->ndw = nbc.ndw();
- + bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
- + nbc.write_data(bc->bytecode);
- +
- + bc->ngpr = sh->ngpr;
- + bc->nstack = sh->nstack;
- +
- + delete sh;
- + return 0;
- +}
- +
- +int r600_sb_bytecode_process(struct r600_context *rctx,
- + struct r600_bytecode *bc,
- + struct r600_shader *pshader,
- + int dump_bytecode,
- + int optimize) {
- + int r = 0;
- + unsigned shader_id = bc->debug_id;
- +
- + sb_context *ctx = (sb_context *)rctx->sb_context;
- + if (!ctx) {
- + rctx->sb_context = ctx = r600_sb_context_create(rctx);
- + }
- +
- + int64_t time_start = 0;
- + if (sb_context::dump_stat) {
- + time_start = os_time_get_nano();
- + }
- +
- + SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
- +
- + bc_parser parser(*ctx, bc, pshader);
- +
- + if ((r = parser.decode())) {
- + assert(!"sb: bytecode decoding error");
- + return r;
- + }
- +
- + shader *sh = parser.get_shader();
- +
- + if (dump_bytecode) {
- + bc_dump(*sh, bc->bytecode, bc->ndw).run();
- + }
- +
- + if (!optimize) {
- + delete sh;
- + return 0;
- + }
- +
- + if (sh->target != TARGET_FETCH) {
- + sh->src_stats.ndw = bc->ndw;
- + sh->collect_stats(false);
- + }
- +
- + /* skip some shaders (use shaders from default backend)
- + * dskip_start - range start, dskip_end - range_end,
- + * e.g. start = 5, end = 6 means shaders 5 & 6
- + *
- + * dskip_mode == 0 - disabled,
- + * dskip_mode == 1 - don't process the shaders from the [start;end] range
- + * dskip_mode == 2 - process only the shaders from the range
- + */
- + if (sb_context::dskip_mode) {
- + if ((sb_context::dskip_start <= shader_id &&
- + shader_id <= sb_context::dskip_end) ==
- + (sb_context::dskip_mode == 1)) {
- + sblog << "sb: skipped shader " << shader_id << " : " << "["
- + << sb_context::dskip_start << "; "
- + << sb_context::dskip_end << "] mode "
- + << sb_context::dskip_mode << "\n";
- + return 0;
- + }
- + }
- +
- + if ((r = parser.prepare())) {
- + assert(!"sb: bytecode parsing error");
- + return r;
- + }
- +
- +
- + r = sb_build_bytecode(sh, dump_bytecode);
- + if (r)
- + return r;
- +
- + if (sb_context::dump_stat) {
- + int64_t t = os_time_get_nano() - time_start;
- +
- + sblog << "sb: processing shader " << sh->id << " done ( "
- << ((double)t)/1000000.0 << " ms ).\n";
- sh->opt_stats.ndw = bc->ndw;
- @@ -276,6 +330,20 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
- sh->src_stats.dump_diff(sh->opt_stats);
- }
- + if (!sb_context::dry_run) {
- + bytecode &nbc = sh->get_bytecode();
- +
- + free(bc->bytecode);
- + bc->ndw = nbc.ndw();
- + bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
- + nbc.write_data(bc->bytecode);
- +
- + bc->ngpr = sh->ngpr;
- + bc->nstack = sh->nstack;
- + } else {
- + SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; );
- + }
- +
- delete sh;
- return 0;
- }
- diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
- index c838f62..716af59 100644
- --- a/src/gallium/drivers/r600/sb/sb_ir.h
- +++ b/src/gallium/drivers/r600/sb/sb_ir.h
- @@ -48,19 +48,33 @@ class node;
- class value;
- class shader;
- +enum sel_class {
- + SC_GPR = 0,
- + SC_TGSI_INPUT = 1,
- + SC_TGSI_OUTPUT = 2,
- + SC_TGSI_ADDR = 3,
- + SC_TGSI_CONST = 4,
- +
- +};
- +
- +
- struct sel_chan
- {
- + static const unsigned reg_shift = 2;
- + static const unsigned chan_mask = (1u << reg_shift) - 1;
- +
- unsigned id;
- sel_chan(unsigned id = 0) : id(id) {}
- - sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
- + sel_chan(unsigned sel, unsigned chan)
- + : id(((sel << 2) | chan) + 1) {}
- unsigned sel() const { return sel(id); }
- unsigned chan() const {return chan(id); }
- operator unsigned() const {return id;}
- static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
- - static unsigned chan(unsigned idx) { return (idx-1) & 3; }
- + static unsigned chan(unsigned idx) { return (idx-1) & chan_mask; }
- };
- inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
- @@ -258,8 +272,9 @@ public:
- class value;
- enum value_kind {
- + VLK_INVALID,
- +
- VLK_REG,
- - VLK_REL_REG,
- VLK_SPECIAL_REG,
- VLK_TEMP,
- @@ -268,6 +283,12 @@ enum value_kind {
- VLK_PARAM,
- VLK_SPECIAL_CONST,
- + VLK_TGSI_INPUT,
- + VLK_TGSI_OUTPUT,
- + VLK_TGSI_TEMP,
- + VLK_TGSI_ADDR,
- +
- +
- VLK_UNDEF
- };
- @@ -371,24 +392,29 @@ public:
- typedef sb_value_set val_set;
- -struct gpr_array {
- - sel_chan base_gpr; // original gpr
- +struct rel_array {
- + value_kind kind;
- +
- + sel_chan base_sel;
- + sel_chan pin_gpr;
- sel_chan gpr; // assigned by regalloc
- unsigned array_size;
- + unsigned array_id;
- - gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
- - array_size(array_size) {}
- + rel_array(value_kind kind, sel_chan base_sel, unsigned array_size,
- + unsigned array_id)
- + : kind(kind), base_sel(base_sel), pin_gpr(), gpr(),
- + array_size(array_size), array_id(array_id), interferences(), refs() {}
- - unsigned hash() { return (base_gpr << 10) * array_size; }
- + unsigned hash() { return ((kind << 16) | (base_sel)) * array_size; }
- val_set interferences;
- vvec refs;
- bool is_dead();
- -
- };
- -typedef std::vector<gpr_array*> regarray_vec;
- +typedef std::vector<rel_array*> regarray_vec;
- enum value_flags {
- VLF_UNDEF = (1 << 0),
- @@ -467,7 +493,7 @@ protected:
- value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
- : kind(k), flags(),
- rel(), array(),
- - version(ver), select(select), pin_gpr(select), gpr(),
- + version(ver), select(select), pin_gpr(), gpr(),
- gvn_source(), ghash(),
- def(), adef(), uses(), constraint(), chunk(),
- literal_value(), uid(sh_id) {}
- @@ -482,7 +508,7 @@ public:
- vvec mdef;
- vvec muse;
- value *rel;
- - gpr_array *array;
- + rel_array *array;
- unsigned version;
- @@ -528,8 +554,14 @@ public:
- bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
- + bool is_tgsi_value() {
- + return kind == VLK_TGSI_INPUT || kind == VLK_TGSI_OUTPUT ||
- + kind == VLK_TGSI_TEMP || kind == VLK_TGSI_ADDR;
- + }
- +
- bool is_any_gpr() {
- - return (kind == VLK_REG || kind == VLK_TEMP);
- + return (!rel &&
- + (kind == VLK_REG || kind == VLK_TEMP || is_tgsi_value()));
- }
- bool is_agpr() {
- @@ -544,7 +576,7 @@ public:
- bool is_special_reg() { return kind == VLK_SPECIAL_REG; }
- bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
- bool is_kcache() { return kind == VLK_KCACHE; }
- - bool is_rel() { return kind == VLK_REL_REG; }
- + bool is_rel() { return rel != NULL; }
- bool is_readonly() { return flags & VLF_READONLY; }
- bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
- @@ -573,8 +605,10 @@ public:
- && literal_value != literal(0)
- && literal_value != literal(1)
- && literal_value != literal(-1)
- - && literal_value != literal(0.5)
- - && literal_value != literal(1.0);
- + && literal_value != literal(0.5f)
- + && literal_value != literal(-0.5f)
- + && literal_value != literal(1.0f)
- + && literal_value != literal(-1.0f);
- }
- void add_use(node *n, use_kind kind, int arg);
- @@ -594,7 +628,7 @@ public:
- sel_chan get_final_gpr() {
- if (array && array->gpr) {
- - int reg_offset = select.sel() - array->base_gpr.sel();
- + int reg_offset = select.sel() - array->base_sel.sel();
- if (rel && rel->is_const())
- reg_offset += rel->get_const_value().i;
- return array->gpr + (reg_offset << 2);
- @@ -755,7 +789,7 @@ protected:
- node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
- : prev(), next(), parent(),
- type(nt), subtype(nst), flags(flags),
- - pred(), dst(), src() {}
- + pred(), dst(), src(), source_line() {}
- virtual ~node() {};
- @@ -772,6 +806,8 @@ public:
- vvec dst;
- vvec src;
- + unsigned source_line;
- +
- virtual bool is_valid() { return true; }
- virtual bool accept(vpass &p, bool enter);
- diff --git a/src/gallium/drivers/r600/sb/sb_liveness.cpp b/src/gallium/drivers/r600/sb/sb_liveness.cpp
- index 8ecc9a5..d683ea9 100644
- --- a/src/gallium/drivers/r600/sb/sb_liveness.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_liveness.cpp
- @@ -345,10 +345,10 @@ void liveness::process_op(node& n) {
- int liveness::init() {
- if (sh.compute_interferences) {
- - gpr_array_vec &vv = sh.arrays();
- - for (gpr_array_vec::iterator I = vv.begin(), E = vv.end(); I != E;
- + rel_array_vec &vv = sh.arrays();
- + for (rel_array_vec::iterator I = vv.begin(), E = vv.end(); I != E;
- ++I) {
- - gpr_array *a = *I;
- + rel_array *a = *I;
- a->interferences.clear();
- }
- }
- diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
- index 95d2a20..64d2e1d 100644
- --- a/src/gallium/drivers/r600/sb/sb_pass.h
- +++ b/src/gallium/drivers/r600/sb/sb_pass.h
- @@ -588,7 +588,9 @@ class ssa_prepare : public vpass {
- unsigned level;
- public:
- - ssa_prepare(shader &s) : vpass(s), level(0) {}
- + ssa_prepare(shader &s) : vpass(s), stk(), level(0) {}
- +
- + virtual int init() { stk.resize(1); return 0; }
- virtual bool visit(cf_node &n, bool enter);
- virtual bool visit(alu_node &n, bool enter);
- diff --git a/src/gallium/drivers/r600/sb/sb_public.h b/src/gallium/drivers/r600/sb/sb_public.h
- index c9f5f97..c42ef2e 100644
- --- a/src/gallium/drivers/r600/sb/sb_public.h
- +++ b/src/gallium/drivers/r600/sb/sb_public.h
- @@ -31,6 +31,11 @@ struct r600_shader;
- void r600_sb_context_destroy(void *sctx);
- +int r600_sb_compile_tgsi(struct r600_context *rctx,
- + struct r600_pipe_shader *pipe_shader,
- + struct r600_shader_key key,
- + int dump);
- +
- int r600_sb_bytecode_process(struct r600_context *rctx,
- struct r600_bytecode *bc,
- struct r600_shader *pshader,
- diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
- index 0b332a9..856a2d9 100644
- --- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp
- @@ -259,13 +259,13 @@ sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
- void ra_init::alloc_arrays() {
- - gpr_array_vec &ga = sh.arrays();
- + rel_array_vec &ga = sh.arrays();
- - for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) {
- - gpr_array *a = *I;
- + for(rel_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) {
- + rel_array *a = *I;
- RA_DUMP(
- - sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
- + sblog << "array [" << a->array_size << "] at " << a->base_sel << "\n";
- sblog << "\n";
- );
- @@ -300,7 +300,7 @@ void ra_init::alloc_arrays() {
- regbits rb(sh, s);
- sel_chan base = rb.find_free_array(a->array_size,
- - (1 << a->base_gpr.chan()));
- + (1 << a->base_sel.chan()));
- RA_DUMP( sblog << " found base: " << base << "\n"; );
- @@ -349,9 +349,7 @@ void ra_init::process_op(node* n) {
- break;
- }
- }
- - }
- -
- - if (n->is_fetch_inst() || n->is_cf_inst()) {
- + } else if (n->is_fetch_inst() || n->is_cf_inst()) {
- for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
- value *v = *I;
- if (v && v->is_sgpr())
- @@ -684,7 +682,6 @@ void ra_split::split_packed_ins(alu_packed_node *n) {
- }
- }
- -// TODO handle other packed ops for cayman
- void ra_split::split_alu_packed(alu_packed_node* n) {
- switch (n->op()) {
- case ALU_OP2_DOT4:
- @@ -692,6 +689,11 @@ void ra_split::split_alu_packed(alu_packed_node* n) {
- split_packed_ins(n);
- break;
- default:
- + if (ctx.is_cayman()) {
- + unsigned slots = ctx.alu_slots(n->op_ptr());
- + if (slots == AF_4VE || slots == AF_S)
- + split_packed_ins(n);
- + }
- break;
- }
- }
- @@ -723,7 +725,7 @@ void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
- if (!allow_swz) {
- t->flags |= VLF_PIN_CHAN;
- - t->pin_gpr = sel_chan(0, ch);
- + t->pin_gpr = sel_chan(t->pin_gpr.sel(), ch);
- }
- v2.push_back(o);
- @@ -812,6 +814,11 @@ void ra_split::split_vector_inst(node* n) {
- } else
- sel = s->select;
- + // FIXME: handle this more cleanly
- + // (propagate pin_gpr in ssa rename)
- + if (s->kind == VLK_TGSI_INPUT)
- + sel = sel_chan(sel.sel() + 1, sel.chan());
- +
- v->gpr = v->pin_gpr = sel;
- v->fix();
- }
- diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
- index f0e41f5..fd0f761 100644
- --- a/src/gallium/drivers/r600/sb/sb_sched.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
- @@ -939,9 +939,10 @@ void post_scheduler::update_live(node *n, val_set *born) {
- void post_scheduler::process_group() {
- alu_group_tracker &rt = alu.grp();
- -
- val_set vals_born;
- + prev_array_read.clear();
- +
- recolor_locals();
- PSC_DUMP(
- @@ -956,6 +957,7 @@ void post_scheduler::process_group() {
- continue;
- update_live(n, &vals_born);
- + update_prev_array_read(n);
- }
- PSC_DUMP(
- @@ -1014,7 +1016,10 @@ void post_scheduler::schedule_alu(container_node *c) {
- prev_regmap = regmap;
- if (!prepare_alu_group()) {
- - if (alu.current_ar) {
- + if (latency_check_failed) {
- + emit_nop_group();
- + continue;
- + } else if (alu.current_ar) {
- emit_load_ar();
- continue;
- } else
- @@ -1263,6 +1268,11 @@ bool post_scheduler::map_src_val(value *v) {
- return true;
- sel_chan gpr = v->get_final_gpr();
- +
- + PSC_DUMP(
- + sblog << "map src " << *v << " to " << gpr << "\n";
- + );
- +
- rv_map::iterator F = regmap.find(gpr);
- value *c = NULL;
- if (F != regmap.end()) {
- @@ -1436,6 +1446,11 @@ unsigned post_scheduler::try_add_instruction(node *n) {
- alu_group_tracker &rt = alu.grp();
- +#if 0 // this seems not a problem so far at least on evergreen
- + if (!check_latency(n))
- + return 0;
- +#endif
- +
- unsigned avail_slots = rt.avail_slots();
- if (n->is_alu_packed()) {
- @@ -1606,6 +1621,8 @@ bool post_scheduler::prepare_alu_group() {
- alu_group_tracker &rt = alu.grp();
- + latency_check_failed = false;
- +
- unsigned i1 = 0;
- PSC_DUMP(
- @@ -1634,7 +1651,6 @@ bool post_scheduler::prepare_alu_group() {
- sblog << "\n";
- );
- -
- unsigned cnt = try_add_instruction(n);
- if (!cnt)
- @@ -1970,4 +1986,60 @@ void rp_gpr_tracker::dump() {
- }
- }
- +void post_scheduler::update_prev_array_read(alu_node* n) {
- + for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
- + value *v = *I;
- +
- + if (!v || !v->array)
- + continue;
- +
- + prev_array_read.push_back(v);
- + }
- +}
- +
- +bool post_scheduler::check_latency(node* n) {
- + for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) {
- + value *d = *I;
- +
- + if (!d || !d->array)
- + continue;
- +
- + if (!check_value_latency(d))
- + return false;
- + }
- + return true;
- +}
- +
- +bool post_scheduler::check_value_latency(value* v) {
- + for (vvec::iterator I = prev_array_read.begin(), E = prev_array_read.end();
- + I != E; ++I) {
- + value *r = *I;
- +
- + if (r->array == v->array) {
- + bool rel_write = v->is_rel();
- + bool rel_read = r->is_rel();
- +
- + if (rel_write ^ rel_read) {
- + latency_check_failed = true;
- + return false;
- + }
- + }
- + }
- + return true;
- +}
- +
- +void post_scheduler::emit_nop_group() {
- + alu_node * a = sh.create_alu();
- + a->bc.set_op(ALU_OP0_NOP);
- +
- + alu_group_tracker &rt = alu.grp();
- + if (!rt.try_reserve(a)) {
- + sblog << "can't emit NOP group : ";
- + dump::dump_op(a);
- + sblog << "\n";
- + }
- +
- + alu.emit_group();
- +}
- +
- } // namespace r600_sb
- diff --git a/src/gallium/drivers/r600/sb/sb_sched.h b/src/gallium/drivers/r600/sb/sb_sched.h
- index a74484f..40e8b15 100644
- --- a/src/gallium/drivers/r600/sb/sb_sched.h
- +++ b/src/gallium/drivers/r600/sb/sb_sched.h
- @@ -254,11 +254,15 @@ class post_scheduler : public pass {
- val_set cleared_interf;
- + vvec prev_array_read;
- + bool latency_check_failed;
- +
- public:
- post_scheduler(shader &sh) : pass(sh),
- ready(), ready_copies(), pending(), cur_bb(),
- - live(), ucm(), alu(sh), regmap(), cleared_interf() {}
- + live(), ucm(), alu(sh), regmap(), cleared_interf(),
- + prev_array_read(), latency_check_failed() {}
- virtual int run();
- void run_on(container_node *n);
- @@ -317,6 +321,11 @@ public:
- void emit_clause();
- void process_ready_copies();
- +
- + void update_prev_array_read(alu_node *n);
- + bool check_latency(node *n);
- + bool check_value_latency(value *v);
- + void emit_nop_group();
- };
- } // namespace r600_sb
- diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp
- index 9fc47ae..2443364 100644
- --- a/src/gallium/drivers/r600/sb/sb_shader.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_shader.cpp
- @@ -30,24 +30,23 @@
- namespace r600_sb {
- -shader::shader(sb_context &sctx, shader_target t, unsigned id)
- -: ctx(sctx), next_temp_value_index(temp_regid_offset),
- - prep_regs_count(), pred_sels(),
- - regions(), inputs(), undef(), val_pool(sizeof(value)),
- - pool(), all_nodes(), src_stats(), opt_stats(), errors(),
- - optimized(), id(id),
- - coal(*this), bbs(),
- - target(t), vt(ex), ex(*this), root(),
- - compute_interferences(),
- - has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack() {}
- +shader::shader(sb_context &sctx, shader_target t, unsigned id, bool direct_tgsi)
- + : ctx(sctx), next_temp_value_index(temp_regid_offset), pred_sels(),
- + regions(), inputs(), undef(), val_pool(sizeof(value)), pool(),
- + all_nodes(), bc(sctx.hw_class_bit()), src_stats(), opt_stats(),
- + errors(), optimized(), id(id), coal(*this), bbs(), target(t),
- + vt(ex), ex(*this), root(), compute_interferences(),
- + has_alu_predication(), uses_gradients(), safe_math(), ngpr(),
- + nstack(), direct_tgsi(direct_tgsi) {
- +}
- bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
- unsigned slot_flags = ctx.alu_slots(n->bc.op);
- unsigned slot = n->bc.dst_chan;
- - if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
- - (slot_flags & AF_S))
- + if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot])
- + && (slot_flags & AF_S))
- slot = SLOT_TRANS;
- if (slots[slot])
- @@ -59,7 +58,7 @@ bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
- }
- void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
- - bool src) {
- + bool src) {
- unsigned chan = 0;
- while (comp_mask) {
- if (comp_mask & 1) {
- @@ -72,7 +71,7 @@ void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
- if (v->array && !v->array->gpr) {
- // if pinned value can be accessed with indirect addressing
- // pin the entire array to its original location
- - v->array->gpr = v->array->base_gpr;
- + v->array->gpr = v->array->base_sel;
- }
- vec.push_back(v);
- }
- @@ -81,16 +80,49 @@ void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
- }
- }
- +void shader::add_pinned_inputs(vvec& vec, value_kind kind, unsigned sel,
- + unsigned comp_mask, bool src,
- + unsigned pin_gpr_sel) {
- + unsigned chan = 0;
- + while (comp_mask) {
- + if (comp_mask & 1) {
- + value *v = get_reg_value(kind, src, sel, chan, false);
- + if (!v->array) {
- + v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
- + v->gpr = v->pin_gpr = sel_chan(pin_gpr_sel, chan);
- + v->fix();
- + }
- +/* if (v->array && !v->array->gpr) {
- + // if pinned value can be accessed with indirect addressing
- + // pin the entire array to its original location
- + v->array->gpr = sel_chan(
- + pin_gpr_sel - (sel - v->array->base_sel), chan);
- + }
- +*/ vec.push_back(v);
- + }
- + comp_mask >>= 1;
- + ++chan;
- + }
- +}
- +
- cf_node* shader::create_clause(node_subtype nst) {
- cf_node *n = create_cf();
- n->subtype = nst;
- switch (nst) {
- - case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
- - case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
- - case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
- - default: assert(!"invalid clause type"); break;
- + case NST_ALU_CLAUSE:
- + n->bc.set_op(CF_OP_ALU);
- + break;
- + case NST_TEX_CLAUSE:
- + n->bc.set_op(CF_OP_TEX);
- + break;
- + case NST_VTX_CLAUSE:
- + n->bc.set_op(CF_OP_VTX);
- + break;
- + default:
- + assert(!"invalid clause type");
- + break;
- }
- n->bc.barrier = 1;
- @@ -127,13 +159,11 @@ alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
- return n;
- }
- -value* shader::get_value(value_kind kind, sel_chan id,
- - unsigned version) {
- - if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
- - return val_pool[id - 1];
- +value* shader::get_value(value_kind kind, sel_chan id, unsigned version) {
- + unsigned key = (kind << 28) | (version << 14) | id;
- + assert((id & ((1 << 14) - 1)) == id);
- + assert((version & ((1 << 14) - 1)) == version);
- -
- - unsigned key = (kind << 28) | (version << 16) | id;
- value_map::iterator i = reg_values.find(key);
- if (i != reg_values.end()) {
- return i->second;
- @@ -148,53 +178,51 @@ value* shader::get_special_value(unsigned sv_id, unsigned version) {
- return get_value(VLK_SPECIAL_REG, id, version);
- }
- -void shader::fill_array_values(gpr_array *a, vvec &vv) {
- +void shader::fill_array_values(rel_array *a, vvec &vv) {
- unsigned sz = a->array_size;
- vv.resize(sz);
- for (unsigned i = 0; i < a->array_size; ++i) {
- - vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
- - false);
- + vv[i] = get_reg_value(a->kind, true, a->base_sel.sel() + i,
- + a->base_sel.chan(), false);
- }
- }
- -value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
- - unsigned version) {
- - sel_chan id(reg, chan);
- +value* shader::get_reg_value(value_kind kind, bool src, unsigned sel,
- + unsigned chan, bool rel, value *r,
- + unsigned arr_id) {
- + sel_chan id(sel, chan);
- value *v;
- - gpr_array *a = get_gpr_array(reg, chan);
- + rel_array *a = get_rel_array(kind, sel, chan);
- if (rel) {
- assert(a);
- - v = create_value(VLK_REL_REG, id, 0);
- - v->rel = get_special_value(SV_AR_INDEX);
- + v = create_value(kind, id, 0);
- + if (!r)
- + r = get_special_value(SV_AR_INDEX);
- + v->rel = r;
- fill_array_values(a, v->muse);
- if (!src)
- fill_array_values(a, v->mdef);
- } else {
- - if (version == 0 && reg < prep_regs_count)
- - return (val_pool[id - 1]);
- -
- - v = get_value(VLK_REG, id, version);
- + v = get_value(kind, id);
- }
- v->array = a;
- - v->pin_gpr = v->select;
- -
- return v;
- }
- -value* shader::create_temp_value() {
- - sel_chan id(++next_temp_value_index, 0);
- +value* shader::create_temp_value(int chan) {
- + sel_chan id(++next_temp_value_index, chan);
- return get_value(VLK_TEMP, id, 0);
- }
- value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
- return get_ro_value(kcache_values, VLK_KCACHE,
- - sel_chan((bank << 12) | index, chan));
- + sel_chan((bank << 12) | index, chan));
- }
- void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
- if (inputs.size() <= gpr)
- - inputs.resize(gpr+1);
- + inputs.resize(gpr + 1);
- shader_input &i = inputs[gpr];
- i.preloaded = preloaded;
- @@ -203,7 +231,6 @@ void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
- if (preloaded) {
- add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
- }
- -
- }
- void shader::init() {
- @@ -216,8 +243,8 @@ void shader::init_call_fs(cf_node* cf) {
- assert(target == TARGET_VS);
- - for(inputs_vec::const_iterator I = inputs.begin(),
- - E = inputs.end(); I != E; ++I, ++gpr) {
- + for (inputs_vec::const_iterator I = inputs.begin(), E = inputs.end();
- + I != E; ++I, ++gpr) {
- if (!I->preloaded)
- add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
- else
- @@ -232,7 +259,8 @@ void shader::set_undef(val_set& s) {
- val_set &vs = s;
- - for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
- + for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E;
- + ++I) {
- value *v = *I;
- assert(!v->is_readonly() && !v->is_rel());
- @@ -267,14 +295,14 @@ alu_node* shader::create_alu() {
- alu_group_node* shader::create_alu_group() {
- alu_group_node* n =
- - new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
- + new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
- all_nodes.push_back(n);
- return n;
- }
- alu_packed_node* shader::create_alu_packed() {
- alu_packed_node* n =
- - new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
- + new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
- all_nodes.push_back(n);
- return n;
- }
- @@ -295,33 +323,34 @@ fetch_node* shader::create_fetch() {
- }
- region_node* shader::create_region() {
- - region_node *n = new (pool.allocate(sizeof(region_node)))
- - region_node(regions.size());
- + region_node *n = new (pool.allocate(sizeof(region_node))) region_node(
- + regions.size());
- regions.push_back(n);
- all_nodes.push_back(n);
- return n;
- }
- depart_node* shader::create_depart(region_node* target) {
- - depart_node* n = new (pool.allocate(sizeof(depart_node)))
- - depart_node(target, target->departs.size());
- + depart_node* n = new (pool.allocate(sizeof(depart_node))) depart_node(
- + target, target->departs.size());
- target->departs.push_back(n);
- all_nodes.push_back(n);
- return n;
- }
- repeat_node* shader::create_repeat(region_node* target) {
- - repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
- - repeat_node(target, target->repeats.size() + 1);
- + repeat_node* n = new (pool.allocate(sizeof(repeat_node))) repeat_node(
- + target, target->repeats.size() + 1);
- target->repeats.push_back(n);
- all_nodes.push_back(n);
- return n;
- }
- container_node* shader::create_container(node_type nt, node_subtype nst,
- - node_flags flags) {
- - container_node *n = new (pool.allocate(sizeof(container_node)))
- - container_node(nt, nst, flags);
- + node_flags flags) {
- + container_node *n =
- + new (pool.allocate(sizeof(container_node))) container_node(nt, nst,
- + flags);
- all_nodes.push_back(n);
- return n;
- }
- @@ -349,12 +378,12 @@ value* shader::get_const_value(const literal &v) {
- }
- shader::~shader() {
- - for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
- - I != E; ++I)
- + for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); I != E;
- + ++I)
- (*I)->~node();
- - for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
- - I != E; ++I) {
- + for (rel_array_vec::iterator I = rel_arrays.begin(), E = rel_arrays.end();
- + I != E; ++I) {
- delete *I;
- }
- }
- @@ -376,32 +405,37 @@ value* shader::get_value_version(value* v, unsigned ver) {
- return vv;
- }
- -gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
- +rel_array* shader::get_rel_array(value_kind kind, unsigned sel, unsigned chan) {
- - for (regarray_vec::iterator I = gpr_arrays.begin(),
- - E = gpr_arrays.end(); I != E; ++I) {
- - gpr_array* a = *I;
- - unsigned achan = a->base_gpr.chan();
- - unsigned areg = a->base_gpr.sel();
- - if (achan == chan && (reg >= areg && reg < areg+a->array_size))
- + for (regarray_vec::iterator I = rel_arrays.begin(), E = rel_arrays.end();
- + I != E; ++I) {
- + rel_array* a = *I;
- + if (kind != a->kind)
- + continue;
- + unsigned achan = a->base_sel.chan();
- + unsigned areg = a->base_sel.sel();
- + if (achan == chan && (sel >= areg && sel < areg + a->array_size))
- return a;
- }
- return NULL;
- }
- -void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
- - unsigned comp_mask) {
- +void shader::add_rel_array(value_kind kind, unsigned sel_start,
- + unsigned sel_count, unsigned comp_mask,
- + unsigned array_id) {
- unsigned chan = 0;
- while (comp_mask) {
- if (comp_mask & 1) {
- - gpr_array *a = new gpr_array(
- - sel_chan(gpr_start, chan), gpr_count);
- + rel_array *a = new rel_array(kind, sel_chan(sel_start, chan),
- + sel_count, array_id);
- - SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
- - << " [" << a->array_size << "]\n";
- + SB_DUMP_PASS(
- + sblog << "add_gpr_array: @" << a->base_sel << " ["
- + << a->array_size << "]\n"
- + ;
- );
- - gpr_arrays.push_back(a);
- + rel_arrays.push_back(a);
- }
- comp_mask >>= 1;
- ++chan;
- @@ -434,13 +468,18 @@ std::string shader::get_full_target_name() {
- const char* shader::get_shader_target_name() {
- switch (target) {
- - case TARGET_VS: return "VS";
- - case TARGET_PS: return "PS";
- - case TARGET_GS: return "GS";
- - case TARGET_COMPUTE: return "COMPUTE";
- - case TARGET_FETCH: return "FETCH";
- - default:
- - return "INVALID_TARGET";
- + case TARGET_VS:
- + return "VS";
- + case TARGET_PS:
- + return "PS";
- + case TARGET_GS:
- + return "GS";
- + case TARGET_COMPUTE:
- + return "COMPUTE";
- + case TARGET_FETCH:
- + return "FETCH";
- + default:
- + return "INVALID_TARGET";
- }
- }
- @@ -457,7 +496,6 @@ void shader::simplify_dep_rep(node* dr) {
- dr->parent->cut(dr->next, NULL);
- }
- -
- // FIXME this is used in some places as the max non-temp gpr,
- // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
- unsigned shader::first_temp_gpr() {
- @@ -529,10 +567,8 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
- if (inside_bb && !last_inside_bb)
- bb_start = I;
- else if (!inside_bb) {
- - if (last_inside_bb
- - && I->type != NT_REPEAT
- - && I->type != NT_DEPART
- - && I->type != NT_IF) {
- + if (last_inside_bb && I->type != NT_REPEAT && I->type != NT_DEPART
- + && I->type != NT_IF) {
- bb_node *bb = create_bb(bbs.size(), loop_level);
- bbs.push_back(bb);
- n->insert_node_before(*bb_start, bb);
- @@ -548,7 +584,7 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
- }
- create_bbs(static_cast<container_node*>(k), bbs,
- - loop_level + loop);
- + loop_level + loop);
- }
- }
- @@ -562,7 +598,7 @@ void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
- bb_node *bb = create_bb(bbs.size(), loop_level);
- bbs.push_back(bb);
- if (n->empty())
- - n->push_back(bb);
- + n->push_back(bb);
- else {
- n->insert_node_before(*bb_start, bb);
- if (bb_start != n->end())
- @@ -587,22 +623,22 @@ void shader::expand_bbs(bbs_vec &bbs) {
- sched_queue_id shader::get_queue_id(node* n) {
- switch (n->subtype) {
- - case NST_ALU_INST:
- - case NST_ALU_PACKED_INST:
- - case NST_COPY:
- - case NST_PSI:
- - return SQ_ALU;
- - case NST_FETCH_INST: {
- - fetch_node *f = static_cast<fetch_node*>(n);
- - if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
- - return SQ_VTX;
- - return SQ_TEX;
- - }
- - case NST_CF_INST:
- - return SQ_CF;
- - default:
- - assert(0);
- - return SQ_NUM;
- + case NST_ALU_INST:
- + case NST_ALU_PACKED_INST:
- + case NST_COPY:
- + case NST_PSI:
- + return SQ_ALU;
- + case NST_FETCH_INST: {
- + fetch_node *f = static_cast<fetch_node*>(n);
- + if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
- + return SQ_VTX;
- + return SQ_TEX;
- + }
- + case NST_CF_INST:
- + return SQ_CF;
- + default:
- + assert(0);
- + return SQ_NUM;
- }
- }
- @@ -647,10 +683,9 @@ void shader_stats::accumulate(shader_stats& s) {
- void shader_stats::dump() {
- sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
- - << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
- - << ", alu:" << alu << ", fetch:" << fetch
- - << ", fetch clauses:" << fetch_clauses
- - << ", cf:" << cf;
- + << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
- + << ", alu:" << alu << ", fetch:" << fetch << ", fetch clauses:"
- + << fetch_clauses << ", cf:" << cf;
- if (shaders > 1)
- sblog << ", shaders:" << shaders;
- @@ -660,7 +695,7 @@ void shader_stats::dump() {
- static void print_diff(unsigned d1, unsigned d2) {
- if (d1)
- - sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
- + sblog << ((int) d2 - (int) d1) * 100 / (int) d1 << "%";
- else if (d2)
- sblog << "N/A";
- else
- @@ -668,15 +703,24 @@ static void print_diff(unsigned d1, unsigned d2) {
- }
- void shader_stats::dump_diff(shader_stats& s) {
- - sblog << "dw:"; print_diff(ndw, s.ndw);
- - sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
- - sblog << ", stk:" ; print_diff(nstack, s.nstack);
- - sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
- - sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
- - sblog << ", alu:" ; print_diff(alu, s.alu);
- - sblog << ", fetch:" ; print_diff(fetch, s.fetch);
- - sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
- - sblog << ", cf:" ; print_diff(cf, s.cf);
- + sblog << "dw:";
- + print_diff(ndw, s.ndw);
- + sblog << ", gpr:";
- + print_diff(ngpr, s.ngpr);
- + sblog << ", stk:";
- + print_diff(nstack, s.nstack);
- + sblog << ", alu groups:";
- + print_diff(alu_groups, s.alu_groups);
- + sblog << ", alu clauses: ";
- + print_diff(alu_clauses, s.alu_clauses);
- + sblog << ", alu:";
- + print_diff(alu, s.alu);
- + sblog << ", fetch:";
- + print_diff(fetch, s.fetch);
- + sblog << ", fetch clauses:";
- + print_diff(fetch_clauses, s.fetch_clauses);
- + sblog << ", cf:";
- + print_diff(cf, s.cf);
- sblog << "\n";
- }
- diff --git a/src/gallium/drivers/r600/sb/sb_shader.h b/src/gallium/drivers/r600/sb/sb_shader.h
- index e515d31..abc2d6b 100644
- --- a/src/gallium/drivers/r600/sb/sb_shader.h
- +++ b/src/gallium/drivers/r600/sb/sb_shader.h
- @@ -52,7 +52,7 @@ typedef std::multimap<node*, error_info> error_map;
- class sb_context;
- typedef std::vector<shader_input> inputs_vec;
- -typedef std::vector<gpr_array*> gpr_array_vec;
- +typedef std::vector<rel_array*> rel_array_vec;
- struct ra_edge {
- value *a, *b;
- @@ -234,7 +234,7 @@ private:
- ra_chunk* detach_value(value *v);
- };
- -
- +// =============================================================================
- class shader {
- @@ -248,12 +248,10 @@ class shader {
- value_map special_ro_values; // key - hw alu_sel & chan
- value_map kcache_values;
- - gpr_array_vec gpr_arrays;
- + rel_array_vec rel_arrays;
- unsigned next_temp_value_index;
- - unsigned prep_regs_count;
- -
- value* pred_sels[2];
- regions_vec regions;
- @@ -266,6 +264,8 @@ class shader {
- std::vector<node*> all_nodes;
- + bytecode bc;
- +
- public:
- shader_stats src_stats, opt_stats;
- @@ -277,7 +277,7 @@ public:
- coalescer coal;
- - static const unsigned temp_regid_offset = 512;
- + static const unsigned temp_regid_offset = 0;
- bbs_vec bbs;
- @@ -289,26 +289,33 @@ public:
- container_node *root;
- bool compute_interferences;
- -
- bool has_alu_predication;
- bool uses_gradients;
- -
- bool safe_math;
- unsigned ngpr, nstack;
- - shader(sb_context &sctx, shader_target t, unsigned id);
- + bool direct_tgsi;
- +
- + shader(sb_context &sctx, shader_target t, unsigned id,
- + bool direct_tgsi = false);
- ~shader();
- + bytecode& get_bytecode() { return bc; }
- +
- sb_context &get_ctx() const { return ctx; }
- value* get_const_value(const literal & v);
- value* get_special_value(unsigned sv_id, unsigned version = 0);
- - value* create_temp_value();
- - value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
- - unsigned version = 0);
- + value* create_temp_value(int chan = 0);
- + value* get_reg_value(value_kind kind, bool src, unsigned reg,
- + unsigned chan, bool rel, value *r = NULL,
- + unsigned arr_id = 0);
- + value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel) {
- + return get_reg_value(VLK_REG, src, reg, chan, rel);
- + }
- value* get_special_ro_value(unsigned sel);
- value* get_kcache_value(unsigned bank, unsigned index, unsigned chan);
- @@ -316,17 +323,22 @@ public:
- value* get_value_version(value* v, unsigned ver);
- void init();
- - void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);
- + void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
- + bool src);
- +
- + void add_pinned_inputs(vvec& vec, value_kind kind, unsigned sel,
- + unsigned comp_mask, bool src,
- + unsigned pin_gpr_sel);
- void dump_ir();
- - void add_gpr_array(unsigned gpr_start, unsigned gpr_count,
- - unsigned comp_mask);
- + void add_rel_array(value_kind kind, unsigned sel_start, unsigned sel_count,
- + unsigned comp_mask, unsigned array_id = 0);
- value* get_pred_sel(int sel);
- bool assign_slot(alu_node *n, alu_node *slots[5]);
- - gpr_array* get_gpr_array(unsigned reg, unsigned chan);
- + rel_array* get_rel_array(value_kind kind, unsigned sel, unsigned chan);
- void add_input(unsigned gpr, bool preloaded = false,
- unsigned comp_mask = 0xF);
- @@ -381,11 +393,11 @@ public:
- unsigned first_temp_gpr();
- unsigned num_nontemp_gpr();
- - gpr_array_vec& arrays() { return gpr_arrays; }
- + rel_array_vec& arrays() { return rel_arrays; }
- void set_uses_kill();
- - void fill_array_values(gpr_array *a, vvec &vv);
- + void fill_array_values(rel_array *a, vvec &vv);
- alu_node* clone(alu_node *n);
- @@ -393,10 +405,11 @@ public:
- void collect_stats(bool opt);
- -private:
- - value* create_value(value_kind k, sel_chan regid, unsigned ver);
- value* get_value(value_kind kind, sel_chan id,
- unsigned version = 0);
- +
- +private:
- + value* create_value(value_kind k, sel_chan regid, unsigned ver);
- value* get_ro_value(value_map &vm, value_kind vk, unsigned key);
- };
- diff --git a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp
- index 3ad628b..6df2979 100644
- --- a/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp
- @@ -201,8 +201,11 @@ bool ssa_rename::visit(alu_node& n, bool enter) {
- if (!n.dst.empty() && n.dst[0]) {
- // FIXME probably use separate pass for such things
- - if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE)
- + if ((n.bc.op_ptr->flags & AF_INTERP) || n.bc.op == ALU_OP2_CUBE) {
- n.dst[0]->flags |= VLF_PIN_CHAN;
- + n.dst[0]->pin_gpr = sel_chan(n.dst[0]->pin_gpr.sel(),
- + n.bc.slot);
- + }
- }
- }
- return true;
- diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.cpp b/src/gallium/drivers/r600/sb/sb_tgsi.cpp
- new file mode 100644
- index 0000000..361323d
- --- /dev/null
- +++ b/src/gallium/drivers/r600/sb/sb_tgsi.cpp
- @@ -0,0 +1,2335 @@
- +/*
- + * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
- + *
- + * Permission is hereby granted, free of charge, to any person obtaining a
- + * copy of this software and associated documentation files (the "Software"),
- + * to deal in the Software without restriction, including without limitation
- + * on the rights to use, copy, modify, merge, publish, distribute, sub
- + * license, and/or sell copies of the Software, and to permit persons to whom
- + * the Software is furnished to do so, subject to the following conditions:
- + *
- + * The above copyright notice and this permission notice (including the next
- + * paragraph) shall be included in all copies or substantial portions of the
- + * Software.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- + * USE OR OTHER DEALINGS IN THE SOFTWARE.
- + *
- + * Authors:
- + * Vadim Girlin
- + */
- +
- +extern "C" {
- +#include "r600_shader.h"
- +#include "r600_formats.h"
- +}
- +
- +#include "sb_shader.h"
- +#include "sb_tgsi.h"
- +
- +namespace r600_sb {
- +
- +alu_src tgsi_translator::null_alu_src = alu_src();
- +
- +#define TI_DESC(op, isa_op, func, flags) \
- + {#op, TGSI_OPCODE_##op, isa_op, &tgsi_translator::func, flags}
- +
- +#define TI_GAP {}
- +
- +const tgsi_translator::tgsi_inst_info tgsi_translator::tgsi_info_table[TGSI_OPCODE_LAST] =
- + {
- + /* 0 */ TI_DESC(ARL, 0, ti_arl, 0),
- + /* 1 */ TI_DESC(MOV, ALU_OP1_MOV, ti_alu, 0),
- + /* 2 */ TI_DESC(LIT, 0, ti_lit, 0),
- + /* 3 */ TI_DESC(RCP, ALU_OP1_RECIP_IEEE, ti_repl, 0),
- + /* 4 */ TI_DESC(RSQ, ALU_OP1_RECIPSQRT_CLAMPED, ti_repl, 0),
- + /* 5 */ TI_DESC(EXP, 0, ti_exp, 0),
- + /* 6 */ TI_DESC(LOG, 0, ti_log, 0),
- + /* 7 */ TI_DESC(MUL, ALU_OP2_MUL, ti_alu, 0),
- + /* 8 */ TI_DESC(ADD, ALU_OP2_ADD, ti_alu, 0),
- + /* 9 */ TI_DESC(DP3, 0, ti_dot, 0),
- + /* 10 */ TI_DESC(DP4, 0, ti_dot, 0),
- + /* 11 */ TI_DESC(DST, 0, ti_dst, 0),
- + /* 12 */ TI_DESC(MIN, ALU_OP2_MIN, ti_alu, 0),
- + /* 13 */ TI_DESC(MAX, ALU_OP2_MAX, ti_alu, 0),
- + /* 14 */ TI_DESC(SLT, ALU_OP2_SETGT, ti_alu, TIF_ALU_SWAPSRC01),
- + /* 15 */ TI_DESC(SGE, ALU_OP2_SETGE, ti_alu, 0),
- + /* 16 */ TI_DESC(MAD, ALU_OP3_MULADD, ti_alu, 0),
- + /* 17 */ TI_DESC(SUB, ALU_OP2_ADD, ti_alu, 0),
- + /* 18 */ TI_DESC(LRP, 0, ti_lrp, 0),
- + /* 19 */ TI_DESC(CND, 0, ti_unsupported, 0),
- + /* 20 */ TI_DESC(SQRT, 0, ti_unsupported, 0),
- + /* 21 */ TI_DESC(DP2A, 0, ti_unsupported, 0),
- + /* 22 */ TI_GAP,
- + /* 23 */ TI_GAP,
- + /* 24 */ TI_DESC(FRC, ALU_OP1_FRACT, ti_alu, 0),
- + /* 25 */ TI_DESC(CLAMP, 0, ti_unsupported, 0),
- + /* 26 */ TI_DESC(FLR, ALU_OP1_FLOOR, ti_alu, 0),
- + /* 27 */ TI_DESC(ROUND, ALU_OP1_RNDNE, ti_alu, 0),
- + /* 28 */ TI_DESC(EX2, ALU_OP1_EXP_IEEE, ti_repl, 0),
- + /* 29 */ TI_DESC(LG2, ALU_OP1_LOG_IEEE, ti_repl, 0),
- + /* 30 */ TI_DESC(POW, 0, ti_pow, 0),
- + /* 31 */ TI_DESC(XPD, 0, ti_xpd, 0),
- + /* 32 */ TI_GAP,
- + /* 33 */ TI_DESC(ABS, ALU_OP1_MOV, ti_alu, 0),
- + /* 34 */ TI_DESC(RCC, 0, ti_unsupported, 0),
- + /* 35 */ TI_DESC(DPH, 0, ti_dot, 0),
- + /* 36 */ TI_DESC(COS, ALU_OP1_COS, ti_trig, 0),
- + /* 37 */ TI_DESC(DDX, FETCH_OP_GET_GRADIENTS_H, ti_tex, 0),
- + /* 38 */ TI_DESC(DDY, FETCH_OP_GET_GRADIENTS_V, ti_tex, 0),
- + /* 39 */ TI_DESC(KILL, 0, ti_kill, 0),
- + /* 40 */ TI_DESC(PK2H, 0, ti_unsupported, 0),
- + /* 41 */ TI_DESC(PK2US, 0, ti_unsupported, 0),
- + /* 42 */ TI_DESC(PK4B, 0, ti_unsupported, 0),
- + /* 43 */ TI_DESC(PK4UB, 0, ti_unsupported, 0),
- + /* 44 */ TI_DESC(RFL, 0, ti_unsupported, 0),
- + /* 45 */ TI_DESC(SEQ, ALU_OP2_SETE, ti_alu, 0),
- + /* 46 */ TI_DESC(SFL, 0, ti_unsupported, 0),
- + /* 47 */ TI_DESC(SGT, ALU_OP2_SETGT, ti_alu, 0),
- + /* 48 */ TI_DESC(SIN, ALU_OP1_SIN, ti_trig, 0),
- + /* 49 */ TI_DESC(SLE, ALU_OP2_SETGE, ti_alu, TIF_ALU_SWAPSRC01),
- + /* 50 */ TI_DESC(SNE, ALU_OP2_SETNE, ti_alu, 0),
- + /* 51 */ TI_DESC(STR, 0, ti_unsupported, 0),
- + /* 52 */ TI_DESC(TEX, FETCH_OP_SAMPLE, ti_tex, 0),
- + /* 53 */ TI_DESC(TXD, FETCH_OP_SAMPLE_G, ti_tex, 0),
- + /* 54 */ TI_DESC(TXP, FETCH_OP_SAMPLE, ti_tex, 0),
- + /* 55 */ TI_DESC(UP2H, 0, ti_unsupported, 0),
- + /* 56 */ TI_DESC(UP2US, 0, ti_unsupported, 0),
- + /* 57 */ TI_DESC(UP4B, 0, ti_unsupported, 0),
- + /* 58 */ TI_DESC(UP4UB, 0, ti_unsupported, 0),
- + /* 59 */ TI_DESC(X2D, 0, ti_unsupported, 0),
- + /* 60 */ TI_DESC(ARA, 0, ti_unsupported, 0),
- + /* 61 */ TI_DESC(ARR, 0, ti_arl, 0),
- + /* 62 */ TI_DESC(BRA, 0, ti_unsupported, 0),
- + /* 63 */ TI_DESC(CAL, 0, ti_unsupported, 0),
- + /* 64 */ TI_DESC(RET, 0, ti_unsupported, 0),
- + /* 65 */ TI_DESC(SSG, 0, ti_ssg, 0),
- + /* 66 */ TI_DESC(CMP, 0, ti_cmp, 0),
- + /* 67 */ TI_DESC(SCS, 0, ti_scs, 0),
- + /* 68 */ TI_DESC(TXB, FETCH_OP_SAMPLE_LB, ti_tex, 0),
- + /* 69 */ TI_DESC(NRM, 0, ti_unsupported, 0),
- + /* 70 */ TI_DESC(DIV, 0, ti_unsupported, 0),
- + /* 71 */ TI_DESC(DP2, 0, ti_dot, 0),
- + /* 72 */ TI_DESC(TXL, FETCH_OP_SAMPLE_L, ti_tex, 0),
- + /* 73 */ TI_DESC(BRK, CF_OP_LOOP_BREAK, ti_loop_op, 0),
- + /* 74 */ TI_DESC(IF, ALU_OP2_PRED_SETNE, ti_if, 0),
- + /* 75 */ TI_DESC(UIF, ALU_OP2_PRED_SETNE_INT, ti_if, 0),
- + /* 76 */ TI_GAP,
- + /* 77 */ TI_DESC(ELSE, 0, ti_else, 0),
- + /* 78 */ TI_DESC(ENDIF, 0, ti_endif, 0),
- + /* 79 */ TI_GAP,
- + /* 80 */ TI_GAP,
- + /* 81 */ TI_DESC(PUSHA, 0, ti_unsupported, 0),
- + /* 82 */ TI_DESC(POPA, 0, ti_unsupported, 0),
- + /* 83 */ TI_DESC(CEIL, ALU_OP1_CEIL, ti_alu, 0),
- + /* 84 */ TI_DESC(I2F, ALU_OP1_INT_TO_FLT, ti_alu, 0),
- + /* 85 */ TI_DESC(NOT, ALU_OP1_NOT_INT, ti_alu, 0),
- + /* 86 */ TI_DESC(TRUNC, ALU_OP1_TRUNC, ti_alu, 0),
- + /* 87 */ TI_DESC(SHL, ALU_OP2_LSHL_INT, ti_alu, 0),
- + /* 88 */ TI_GAP,
- + /* 89 */ TI_DESC(AND, ALU_OP2_AND_INT, ti_alu, 0),
- + /* 90 */ TI_DESC(OR, ALU_OP2_OR_INT, ti_alu, 0),
- + /* 91 */ TI_DESC(MOD, 0, ti_divmod, 0),
- + /* 92 */ TI_DESC(XOR, ALU_OP2_XOR_INT, ti_alu, 0),
- + /* 93 */ TI_DESC(SAD, 0, ti_unsupported, 0),
- + /* 94 */ TI_DESC(TXF, FETCH_OP_LD, ti_tex, 0),
- + /* 95 */ TI_DESC(TXQ, FETCH_OP_GET_TEXTURE_RESINFO, ti_tex, 0),
- + /* 96 */ TI_DESC(CONT, CF_OP_LOOP_CONTINUE, ti_loop_op, 0),
- + /* 97 */ TI_DESC(EMIT, 0, ti_unsupported, 0),
- + /* 98 */ TI_DESC(ENDPRIM, 0, ti_unsupported, 0),
- + /* 99 */ TI_DESC(BGNLOOP, 0, ti_begin_loop, 0),
- + /* 100 */ TI_DESC(BGNSUB, 0, ti_unsupported, 0),
- + /* 101 */ TI_DESC(ENDLOOP, 0, ti_end_loop, 0),
- + /* 102 */ TI_DESC(ENDSUB, 0, ti_unsupported, 0),
- + /* 103 */ TI_DESC(TXQ_LZ, FETCH_OP_GET_TEXTURE_RESINFO, ti_tex, 0),
- + /* 104 */ TI_GAP,
- + /* 105 */ TI_GAP,
- + /* 106 */ TI_GAP,
- + /* 107 */ TI_DESC(NOP, 0, ti_unsupported, 0),
- + /* 108 */ TI_GAP,
- + /* 109 */ TI_GAP,
- + /* 110 */ TI_GAP,
- + /* 111 */ TI_GAP,
- + /* 112 */ TI_DESC(NRM4, 0, ti_unsupported, 0),
- + /* 113 */ TI_DESC(CALLNZ, 0, ti_unsupported, 0),
- + /* 114 */ TI_GAP,
- + /* 115 */ TI_DESC(BREAKC, 0, ti_unsupported, 0),
- + /* 116 */ TI_DESC(KILL_IF, 0, ti_kill, 0),
- + /* 117 */ TI_DESC(END, 0, ti_unsupported, 0),
- + /* 118 */ TI_GAP,
- + /* 119 */ TI_DESC(F2I, ALU_OP1_FLT_TO_INT, ti_f2iu, 0),
- + /* 120 */ TI_DESC(IDIV, 0, ti_divmod, 0),
- + /* 121 */ TI_DESC(IMAX, ALU_OP2_MAX_INT, ti_alu, 0),
- + /* 122 */ TI_DESC(IMIN, ALU_OP2_MIN_INT, ti_alu, 0),
- + /* 123 */ TI_DESC(INEG, 0, ti_ineg, 0),
- + /* 124 */ TI_DESC(ISGE, ALU_OP2_SETGE_INT, ti_alu, 0),
- + /* 125 */ TI_DESC(ISHR, ALU_OP2_ASHR_INT, ti_alu, 0),
- + /* 126 */ TI_DESC(ISLT, ALU_OP2_SETGT_INT, ti_alu, TIF_ALU_SWAPSRC01),
- + /* 127 */ TI_DESC(F2U, ALU_OP1_FLT_TO_UINT, ti_f2iu, 0),
- + /* 128 */ TI_DESC(U2F, ALU_OP1_UINT_TO_FLT, ti_alu, 0),
- + /* 129 */ TI_DESC(UADD, ALU_OP2_ADD_INT, ti_alu, 0),
- + /* 130 */ TI_DESC(UDIV, 0, ti_divmod, 0),
- + /* 131 */ TI_DESC(UMAD, 0, ti_umad, 0),
- + /* 132 */ TI_DESC(UMAX, ALU_OP2_MAX_UINT, ti_alu, 0),
- + /* 133 */ TI_DESC(UMIN, ALU_OP2_MIN_UINT, ti_alu, 0),
- + /* 134 */ TI_DESC(UMOD, 0, ti_divmod, 0),
- + /* 135 */ TI_DESC(UMUL, ALU_OP2_MULLO_INT, ti_alu, 0),
- + /* 136 */ TI_DESC(USEQ, ALU_OP2_SETE_INT, ti_alu, 0),
- + /* 137 */ TI_DESC(USGE, ALU_OP2_SETGE_UINT, ti_alu, 0),
- + /* 138 */ TI_DESC(USHR, ALU_OP2_LSHR_INT, ti_alu, 0),
- + /* 139 */ TI_DESC(USLT, ALU_OP2_SETGT_UINT, ti_alu, TIF_ALU_SWAPSRC01),
- + /* 140 */ TI_DESC(USNE, ALU_OP2_SETNE_INT, ti_alu, 0),
- + /* 141 */ TI_DESC(SWITCH, 0, ti_unsupported, 0),
- + /* 142 */ TI_DESC(CASE, 0, ti_unsupported, 0),
- + /* 143 */ TI_DESC(DEFAULT, 0, ti_unsupported, 0),
- + /* 144 */ TI_DESC(ENDSWITCH, 0, ti_unsupported, 0),
- + /* 145 */ TI_DESC(SAMPLE, 0, ti_unsupported, 0),
- + /* 146 */ TI_DESC(SAMPLE_I, 0, ti_unsupported, 0),
- + /* 147 */ TI_DESC(SAMPLE_I_MS, 0, ti_unsupported, 0),
- + /* 148 */ TI_DESC(SAMPLE_B, 0, ti_unsupported, 0),
- + /* 149 */ TI_DESC(SAMPLE_C, 0, ti_unsupported, 0),
- + /* 150 */ TI_DESC(SAMPLE_C_LZ, 0, ti_unsupported, 0),
- + /* 151 */ TI_DESC(SAMPLE_D, 0, ti_unsupported, 0),
- + /* 152 */ TI_DESC(SAMPLE_L, 0, ti_unsupported, 0),
- + /* 153 */ TI_DESC(GATHER4, 0, ti_unsupported, 0),
- + /* 154 */ TI_DESC(SVIEWINFO, 0, ti_unsupported, 0),
- + /* 155 */ TI_DESC(SAMPLE_POS, 0, ti_unsupported, 0),
- + /* 156 */ TI_DESC(SAMPLE_INFO, 0, ti_unsupported, 0),
- + /* 157 */ TI_DESC(UARL, 0, ti_arl, 0),
- + /* 158 */ TI_DESC(UCMP, 0, ti_cmp, 0),
- + /* 159 */ TI_DESC(IABS, 0, ti_iabs, 0),
- + /* 160 */ TI_DESC(ISSG, 0, ti_ssg, 0),
- + /* 161 */ TI_DESC(LOAD, 0, ti_unsupported, 0),
- + /* 162 */ TI_DESC(STORE, 0, ti_unsupported, 0),
- + /* 163 */ TI_DESC(MFENCE, 0, ti_unsupported, 0),
- + /* 164 */ TI_DESC(LFENCE, 0, ti_unsupported, 0),
- + /* 165 */ TI_DESC(SFENCE, 0, ti_unsupported, 0),
- + /* 166 */ TI_DESC(BARRIER, 0, ti_unsupported, 0),
- + /* 167 */ TI_DESC(ATOMUADD, 0, ti_unsupported, 0),
- + /* 168 */ TI_DESC(ATOMXCHG, 0, ti_unsupported, 0),
- + /* 169 */ TI_DESC(ATOMCAS, 0, ti_unsupported, 0),
- + /* 170 */ TI_DESC(ATOMAND, 0, ti_unsupported, 0),
- + /* 171 */ TI_DESC(ATOMOR, 0, ti_unsupported, 0),
- + /* 172 */ TI_DESC(ATOMXOR, 0, ti_unsupported, 0),
- + /* 173 */ TI_DESC(ATOMUMIN, 0, ti_unsupported, 0),
- + /* 174 */ TI_DESC(ATOMUMAX, 0, ti_unsupported, 0),
- + /* 175 */ TI_DESC(ATOMIMIN, 0, ti_unsupported, 0),
- + /* 176 */ TI_DESC(ATOMIMAX, 0, ti_unsupported, 0),
- + /* 177 */ TI_DESC(TEX2, FETCH_OP_SAMPLE, ti_tex, 0),
- + /* 178 */ TI_DESC(TXB2, FETCH_OP_SAMPLE_LB, ti_tex, 0),
- + /* 179 */ TI_DESC(TXL2, FETCH_OP_SAMPLE_L, ti_tex, 0)
- + /* 180 */ /* TI_DESC(LAST, 0, ti_unsupported, 0) */
- + };
- +
- +#undef TI_DESC
- +#undef TI_GAP
- +
- +#define FILLV4(a, b) a[0]=a[1]=a[2]=a[3]=b
- +#define VSWZ_XYZW(a) a[0] = 0; a[1] = 1; a[2] = 2; a[3] = 3
- +#define VSWZ_MASK(a) FILLV4(a, SEL_MASK)
- +#define VSWZ_INIT(a, s0, s1, s2, s3) a[0]=s0; a[1]=s1, a[2]=s2; a[3]=s3
- +
- +shader* tgsi_translator::translate() {
- + shader_target target;
- + int r;
- +
- + tokens = ps->selector->tokens;
- + tgsi_parse_init(&parse, tokens);
- +
- + tgsi_proc = parse.FullHeader.Processor.Processor;
- +
- + switch (tgsi_proc) {
- + case TGSI_PROCESSOR_VERTEX:
- + target = TARGET_VS;
- + break;
- + case TGSI_PROCESSOR_FRAGMENT:
- + target = TARGET_PS;
- + break;
- + case TGSI_PROCESSOR_GEOMETRY:
- + target = TARGET_GS;
- + break;
- + case TGSI_PROCESSOR_COMPUTE:
- + target = TARGET_COMPUTE;
- + break;
- + default:
- + assert(!"unexpected shader type");
- + return NULL;
- + }
- +
- + sh = new shader(ctx, target, shader_id, true);
- + sh->init();
- + current = sh->root;
- +
- + if ((r = parse_declarations()))
- + return NULL;
- +
- + emit_inputs();
- +
- + if ((r = parse_instructions()))
- + return NULL;
- +
- + tgsi_parse_free(&parse);
- +
- + emit_exports();
- +
- + update_pipe_shader();
- +
- + if (r) {
- + delete sh;
- + return NULL;
- + } else
- + return sh;
- +}
- +
- +int tgsi_translator::parse_declarations() {
- + int r;
- +
- + while (!tgsi_parse_end_of_tokens(&parse)) {
- + tgsi_parse_token(&parse);
- + switch (parse.FullToken.Token.Type) {
- + case TGSI_TOKEN_TYPE_PROPERTY:
- + r = parse_property();
- + break;
- + case TGSI_TOKEN_TYPE_DECLARATION:
- + r = parse_declaration();
- + break;
- + case TGSI_TOKEN_TYPE_IMMEDIATE:
- + r = parse_immediate();
- + break;
- + case TGSI_TOKEN_TYPE_INSTRUCTION:
- + return 0;
- + break;
- + default:
- + assert(!"unexpected tgsi token type");
- + return -1;
- + }
- + if (r)
- + return r;
- + }
- + return 0;
- +}
- +
- +int tgsi_translator::parse_instructions() {
- + int r;
- +
- + while (true) {
- + switch (parse.FullToken.Token.Type) {
- + case TGSI_TOKEN_TYPE_INSTRUCTION:
- + r = parse_instruction();
- + break;
- + default:
- + assert(!"unexpected tgsi token type");
- + return -1;
- + }
- + if (r)
- + return r;
- +
- + if (tgsi_parse_end_of_tokens(&parse))
- + break;
- + tgsi_parse_token(&parse);
- + };
- + return 0;
- +}
- +
- +int tgsi_translator::parse_property() {
- + tgsi_full_property *property = &parse.FullToken.FullProperty;
- +
- + switch (property->Property.PropertyName) {
- + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- + if (property->u[0].Data == 1)
- + fs_write_all = TRUE;
- + break;
- + case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
- + break;
- + default:
- + assert(!"unexpected tgsi property token");
- + break;
- + }
- +
- + return 0;
- +}
- +
- +int tgsi_translator::parse_declaration() {
- + tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
- + unsigned i;
- +
- + // TODO handle array input/output decls
- +
- + switch (d->Declaration.File) {
- + case TGSI_FILE_INPUT:
- + i = ninput++;
- + input[i].d.name = d->Semantic.Name;
- + input[i].d.sid = d->Semantic.Index;
- + input[i].d.interpolate = d->Interp.Interpolate;
- + input[i].d.centroid = d->Interp.Centroid;
- + input[i].tgsi_index = d->Range.First;
- + if (tgsi_proc == TGSI_PROCESSOR_FRAGMENT) {
- + if (input[i].d.name != TGSI_SEMANTIC_POSITION &&
- + input[i].d.name != TGSI_SEMANTIC_FACE) {
- + if (input[i].d.interpolate == TGSI_INTERPOLATE_LINEAR)
- + interp_mask |= (1 << 1);
- + else if (input[i].d.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
- + interp_mask |= (1 << 0);
- + if (input[i].d.centroid)
- + interp_mask |= (1 << 2);
- + }
- +
- + input[i].d.spi_sid = spi_sid(input[i].d.name, input[i].d.sid);
- +
- + switch (input[i].d.name) {
- + case TGSI_SEMANTIC_FACE:
- + face_input = i;
- + break;
- + case TGSI_SEMANTIC_COLOR:
- + ++colors_used;
- + break;
- + case TGSI_SEMANTIC_POSITION:
- + fragcoord_input = i;
- + break;
- + }
- + }
- + break;
- + case TGSI_FILE_OUTPUT:
- + i = noutput++;
- + output[i].d.name = d->Semantic.Name;
- + output[i].d.sid = d->Semantic.Index;
- + output[i].d.interpolate = d->Interp.Interpolate;
- + output[i].d.write_mask = d->Declaration.UsageMask;
- + output[i].tgsi_index = d->Range.First;
- + if (tgsi_proc == TGSI_PROCESSOR_VERTEX) {
- + output[i].d.spi_sid = spi_sid(output[i].d.name, output[i].d.sid);
- + switch (d->Semantic.Name) {
- + case TGSI_SEMANTIC_CLIPDIST:
- + clip_dist_write |= d->Declaration.UsageMask
- + << (d->Semantic.Index << 2);
- + break;
- + case TGSI_SEMANTIC_PSIZE:
- + vs_out_misc_write = 1;
- + vs_out_point_size = 1;
- + break;
- + case TGSI_SEMANTIC_CLIPVERTEX:
- + clip_vertex_write = TRUE;
- + cv_output = i;
- + break;
- + }
- + } else if (tgsi_proc == TGSI_PROCESSOR_FRAGMENT) {
- + switch (d->Semantic.Name) {
- + case TGSI_SEMANTIC_COLOR:
- + nr_ps_max_color_exports++;
- + break;
- + }
- + }
- + break;
- +
- + case TGSI_FILE_TEMPORARY:
- + if (d->Array.ArrayID && d->Range.Last > d->Range.First) {
- + sh->add_rel_array(VLK_TGSI_TEMP, d->Range.First,
- + d->Range.Last - d->Range.First + 1, 0xF, d->Array.ArrayID);
- + }
- + break;
- +
- + case TGSI_FILE_CONSTANT:
- + case TGSI_FILE_SAMPLER:
- + case TGSI_FILE_ADDRESS:
- + break;
- +
- + case TGSI_FILE_SYSTEM_VALUE:
- + if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
- + instanceid_index = d->Range.First;
- + break;
- + } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
- + vertexid_index = d->Range.First;
- + break;
- + default:
- + assert(!"unexpected tgsi declaration");
- + return -1;
- + }
- + return 0;
- +}
- +
- +int tgsi_translator::parse_immediate() {
- + literals.reserve(literals.size() + 4);
- + literals.push_back(parse.FullToken.FullImmediate.u[0].Uint);
- + literals.push_back(parse.FullToken.FullImmediate.u[1].Uint);
- + literals.push_back(parse.FullToken.FullImmediate.u[2].Uint);
- + literals.push_back(parse.FullToken.FullImmediate.u[3].Uint);
- + return 0;
- +}
- +
- +int tgsi_translator::spi_sid(int name, int sid) {
- + int index;
- +
- + /* These params are handled differently, they don't need
- + * semantic indices, so we'll use 0 for them. */
- + if (name == TGSI_SEMANTIC_POSITION || name == TGSI_SEMANTIC_PSIZE
- + || name == TGSI_SEMANTIC_FACE)
- + index = 0;
- + else {
- + if (name == TGSI_SEMANTIC_GENERIC) {
- + /* For generic params simply use sid from tgsi */
- + index = sid;
- + } else {
- + /* For non-generic params - pack name and sid into 8 bits */
- + index = 0x80 | (name << 3) | sid;
- + }
- + /* Make sure that all really used indices have nonzero value, so
- + * we can just compare it to 0 later instead of comparing the name
- + * with different values to detect special cases. */
- + index++;
- + }
- + return index;
- +}
- +
- +int tgsi_translator::parse_instruction() {
- +
- + int i, r;
- + inst = &parse.FullToken.FullInstruction;
- + unsigned tgsi_opcode = inst->Instruction.Opcode;
- +
- + if (tgsi_opcode == TGSI_OPCODE_END)
- + return 0;
- +
- + info = &tgsi_info_table[tgsi_opcode];
- + assert(info->tgsi_op == tgsi_opcode);
- + assert(info->func);
- +
- + args = tgsi_args();
- +
- + if (inst->Instruction.NumDstRegs) {
- + assert(inst->Instruction.NumDstRegs == 1);
- + args.dst.dst = true;
- + args.dst.file = inst->Dst[0].Register.File;
- + args.dst.kind = file_to_value_kind(args.dst.file);
- + args.dst.sel = inst->Dst[0].Register.Index;
- + args.dst.rel = inst->Dst[0].Register.Indirect;
- +
- + if (args.dst.rel) {
- + assert(inst->Dst[0].Indirect.File == TGSI_FILE_ADDRESS);
- + args.dst.rel_addr_index = inst->Dst[0].Indirect.Index;
- + args.dst.rel_addr_chan = inst->Dst[0].Indirect.Swizzle;
- + args.dst.rel_array_id = inst->Dst[0].Indirect.ArrayID;
- + indirect_vlk |= (1 << args.dst.kind);
- + }
- + }
- +
- + write_mask = inst->Dst[0].Register.WriteMask;
- + clamp = inst->Instruction.Saturate;
- + args.nsrc = inst->Instruction.NumSrcRegs;
- +
- + unsigned nconst = 0;
- + unsigned nliteral = 0;
- +
- + for (i = 0; i < args.nsrc; ++i) {
- + tgsi_arg &a = args.src[i];
- + a.file = inst->Src[i].Register.File;
- + a.sel = inst->Src[i].Register.Index;
- +
- + if (a.file == TGSI_FILE_SYSTEM_VALUE) {
- + if (a.sel == instanceid_index) {
- + a.kind = VLK_REG;
- + a.sel = 0;
- + FILLV4(a.swz, SEL_W);
- + } else if (a.sel == vertexid_index) {
- + a.kind = VLK_REG;
- + a.sel = 0;
- + FILLV4(a.swz, SEL_X);
- + } else {
- + assert(!"unexpected system value");
- + }
- + } else if (a.file != TGSI_FILE_SAMPLER) {
- +
- + a.kind = file_to_value_kind(a.file);
- + a.rel = inst->Src[i].Register.Indirect;
- + a.neg = inst->Src[i].Register.Negate;
- + a.abs = inst->Src[i].Register.Absolute;
- + a.swz[0] = inst->Src[i].Register.SwizzleX;
- + a.swz[1] = inst->Src[i].Register.SwizzleY;
- + a.swz[2] = inst->Src[i].Register.SwizzleZ;
- + a.swz[3] = inst->Src[i].Register.SwizzleW;
- +
- + if (a.kind == VLK_KCACHE && inst->Src[i].Register.Dimension)
- + a.kc_bank = inst->Src[i].Dimension.Index;
- +
- + if (a.rel) {
- + assert(inst->Src[i].Indirect.File == TGSI_FILE_ADDRESS);
- + a.rel_addr_index = inst->Src[i].Indirect.Index;
- + a.rel_addr_chan = inst->Src[i].Indirect.Swizzle;
- + a.rel_array_id = inst->Src[i].Indirect.ArrayID;
- + indirect_vlk |= (1 << a.kind);
- +
- + if (a.file == TGSI_FILE_CONSTANT) {
- + fetch_rel_const(a);
- + }
- + } else if (a.file == TGSI_FILE_CONSTANT) {
- + if (nconst == 1) {
- + split_src_arg(a);
- + } else
- + ++nconst;
- + } else if (a.file == TGSI_FILE_IMMEDIATE) {
- + if (nliteral == 1) {
- + split_src_arg(a);
- + } else
- + ++nliteral;
- + }
- + }
- + }
- +
- + if ((r = (this->*(info->func))()))
- + return r;
- +
- + return 0;
- +}
- +
- +int tgsi_translator::emit_export(shader_io& o, unsigned type, unsigned base,
- + unsigned * swz, unsigned tgsi_index) {
- + cf_node *e = sh->create_cf(CF_OP_EXPORT);
- + e->src = get_vector_values(VLK_TGSI_OUTPUT, tgsi_index, swz);
- + e->bc.array_base = base;
- + e->bc.type = type;
- + e->bc.elem_size = 3; // XXX is it required?
- + memcpy(e->bc.sel, swz, 4 * sizeof(unsigned));
- + emit_node(e);
- + return 0;
- +}
- +
- +int tgsi_translator::emit_fake_export(unsigned type) {
- + cf_node *e = sh->create_cf(CF_OP_EXPORT);
- + e->bc.sel[0] = 7;
- + e->bc.sel[1] = 7;
- + e->bc.sel[2] = 7;
- + e->bc.sel[3] = 7;
- + e->src.resize(4);
- + e->bc.array_base = type == EXP_POS ? 60 : 0;
- + e->bc.type = type;
- + e->bc.elem_size = 3;
- + emit_node(e);
- + return 0;
- +}
- +
- +int tgsi_translator::emit_exports() {
- + int i, j, k, n;
- + int next_pos = 60, next_pixel = 0, next_param = 0;
- +
- + if (clip_vertex_write) {
- + int cd = noutput;
- +
- + noutput += 2;
- + output[cd].d.name = TGSI_SEMANTIC_CLIPDIST;
- + output[cd].tgsi_index = cd;
- + output[cd + 1].d.name = TGSI_SEMANTIC_CLIPDIST;
- + output[cd + 1].tgsi_index = cd + 1;
- +
- + output[cv_output].d.spi_sid = 0;
- + clip_dist_write = 0xFF;
- +
- + for (i = 0; i < 8; i++) {
- + int oreg = i >> 2, ochan = i & 3;
- + value *o = get_tgsi_value(VLK_TGSI_OUTPUT, cd + oreg, ochan);
- + alu_packed_node *p = sh->create_alu_packed();
- +
- + for (j = 0; j < 4; j++) {
- + value *cvo = get_tgsi_value(VLK_TGSI_OUTPUT, cv_output, j);
- + value *cp = sh->get_kcache_value(R600_UCP_CONST_BUFFER, i, j);
- + alu_node *a = build_alu(ALU_OP2_DOT4, j == ochan ? o : NULL, 0,
- + asrc(cvo), asrc(cp));
- + a->bc.slot = j;
- + p->push_back(a);
- + }
- + emit_node(p);
- + }
- + }
- +
- + pipe_stream_output_info &so = ps->selector->so;
- + for (i = 0; i < (int)so.num_outputs; i++) {
- + int nc = so.output[i].num_components;
- + unsigned start_comp = so.output[i].start_component, real_start;
- + unsigned index = so.output[i].register_index;
- + unsigned dst_offset = so.output[i].dst_offset;
- + unsigned buf = so.output[i].output_buffer;
- + unsigned op = so.output[i].output_buffer;
- +
- + assert(buf < 4);
- + op += ctx.is_egcm() ? CF_OP_MEM_STREAM0_BUF0 : CF_OP_MEM_STREAM0;
- + real_start = (dst_offset < start_comp) ? 0 : start_comp;
- +
- + cf_node *ms = sh->create_cf(op);
- + ms->bc.elem_size = nc;
- + ms->bc.array_base = dst_offset - real_start;
- + ms->bc.type = MEM_WRITE;
- + ms->bc.array_size = 0xFFF;
- + ms->src.resize(4);
- +
- + for (j = 0; j < nc; ++j) {
- + value *v = get_tgsi_value(VLK_TGSI_OUTPUT, index, start_comp + j);
- + ms->src[real_start + j] = v;
- + }
- + emit_node(ms);
- + }
- +
- + for (i = 0; i < noutput; ++i) {
- + shader_io &o = output[i];
- + unsigned ti = o.tgsi_index;
- + unsigned swz[4] = { 0, 1, 2, 3 };
- +
- + switch (sh->target) {
- + case TARGET_VS:
- + switch (o.d.name) {
- + case TGSI_SEMANTIC_CLIPDIST:
- + if (o.d.spi_sid)
- + emit_export(o, EXP_PARAM, next_param++, swz, ti);
- + /* fall through */
- + case TGSI_SEMANTIC_POSITION:
- + case TGSI_SEMANTIC_PSIZE:
- + emit_export(o, EXP_POS, next_pos++, swz, ti);
- + break;
- + case TGSI_SEMANTIC_FOG:
- + swz[1] = 4;
- + swz[2] = 4;
- + swz[3] = 5; /* x001 */
- + emit_export(o, EXP_PARAM, next_param++, swz, ti);
- + break;
- + case TGSI_SEMANTIC_CLIPVERTEX:
- + break;
- + default:
- + emit_export(o, EXP_PARAM, next_param++, swz, ti);
- + }
- + break;
- + case TARGET_PS:
- + if (fs_write_all && ctx.is_egcm())
- + nr_ps_max_color_exports = 8;
- +
- + switch (o.d.name) {
- + case TGSI_SEMANTIC_COLOR:
- + if (next_pixel && next_pixel >= key.nr_cbufs)
- + continue;
- + swz[3] = key.alpha_to_one ? 5 : 3;
- + n = (fs_write_all && ctx.is_egcm() && key.nr_cbufs) ?
- + key.nr_cbufs : 1;
- + for (k = 0; k < n; k++) {
- + emit_export(o, EXP_PIXEL, next_pixel++, swz, ti);
- + }
- + nr_ps_color_exports += n;
- + break;
- + case TGSI_SEMANTIC_POSITION:
- + swz[0] = 2;
- + swz[1] = 7;
- + swz[2] = 7;
- + swz[3] = 7;
- + emit_export(o, EXP_PIXEL, 61, swz, ti);
- + break;
- + case TGSI_SEMANTIC_STENCIL:
- + swz[0] = 7;
- + swz[1] = 1;
- + swz[2] = 7;
- + swz[3] = 7;
- + emit_export(o, EXP_PIXEL, 61, swz, ti);
- + break;
- + default:
- + assert(!"unexpected ps output");
- + }
- + break;
- + default:
- + assert(!"unexpected shader target");
- + break;
- + }
- + }
- +
- + if (sh->target == TARGET_VS) {
- + if (next_pos == 60)
- + emit_fake_export(EXP_POS);
- + if (next_param == 0)
- + emit_fake_export(EXP_PARAM);
- + } else if (sh->target == TARGET_PS && next_pixel == 0)
- + emit_fake_export(EXP_PIXEL);
- +
- + return 0;
- +}
- +
- +int tgsi_translator::ti_unsupported() {
- + sblog << "sb tgsi: unsupported tgsi op " << info->name << "\n";
- + return -1;
- +}
- +
- +inline void tgsi_translator::emit_node(node* n) {
- + current->push_back(n);
- +}
- +
- +// Groups at this stage are used only to represent parallel execution in some
- +// cases until we convert the code to SSA form, they have nothing to do with
- +// VLIW alu groups that will be created later by post_scheduler pass.
- +// E.g., when translating "MOV TEMP[0].xy, TEMP[0].yx", we can put two ISA MOVs
- +// into group to tell the backend that they should be executed in parallel,
- +// otherwise we would need additional temp value and 3 MOVs to perform swap.
- +// Groups are taken into account by ssa construction - all source operands
- +// of grouped operations use the versions that existed before the group.
- +inline void tgsi_translator::begin_group() {
- + alu_group_node *g = sh->create_alu_group();
- + current->push_back(g);
- + current = g;
- +}
- +
- +inline void tgsi_translator::end_group() {
- + assert(current->is_alu_group());
- + current = current->parent;
- +}
- +
- +vvec tgsi_translator::get_vector_values(value_kind kind, unsigned tgsi_index,
- + unsigned* swz) {
- + vvec vv;
- + unsigned i;
- +
- + vv.resize(4);
- + for (i = 0; i < 4; ++i) {
- + unsigned chan = swz ? swz[i] : i;
- + if (chan < 4)
- + vv[i] = get_tgsi_value(kind, tgsi_index, chan);
- + }
- + return vv;
- +}
- +
- +int tgsi_translator::emit_inputs() {
- + int i, nparam = 0, gpr_reserved = 0;
- +
- + // XXX temporary workaround for lack of proper array support for inputs
- + if (ninput)
- + sh->add_rel_array(VLK_TGSI_INPUT, 0, ninput, 0xF, 0);
- +
- + switch (sh->target) {
- + case TARGET_VS: {
- + cf_node *c = sh->create_cf(CF_OP_CALL_FS);
- +
- + c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
- + sh->add_pinned_gpr_values(c->src, 0, 0xF, true);
- + sh->add_input(0, true, 0xF);
- +
- + // pin input arrays
- + for (i = 0; i < 4; ++i) {
- + rel_array *a = sh->get_rel_array(VLK_TGSI_INPUT, 0, i);
- + if (a)
- + a->gpr = sel_chan(1, i);
- + }
- +
- + for (i = 0; i < ninput; ++i) {
- + shader_io &in = input[i];
- + vvec dv = get_vector_values(VLK_TGSI_INPUT, in.tgsi_index);
- + c->dst.insert(c->dst.end(), dv.begin(), dv.end());
- + }
- + emit_node(c);
- + break;
- + }
- + case TARGET_PS:
- + if (ctx.is_egcm()) {
- + if (!interp_mask)
- + interp_mask = 1;
- +
- + unsigned ij_pairs = ((interp_mask & 1) + ((interp_mask >> 1) & 1))
- + * ((interp_mask & 4) ? 2 : 1);
- +
- + unsigned mask = (1u << 2 * ij_pairs) - 1;
- + unsigned gpr = 0;
- +
- + while (mask) {
- + sh->add_input(gpr, true, mask & 0x0F);
- + ++gpr;
- + mask >>= 4;
- + }
- + gpr_reserved = gpr;
- + }
- +
- + // pin input arrays
- + for (i = 0; i < 4; ++i) {
- + rel_array *a = sh->get_rel_array(VLK_TGSI_INPUT, 0, i);
- + if (a)
- + a->gpr = sel_chan(gpr_reserved, i);
- + }
- +
- + if (key.color_two_side && colors_used) {
- + two_side = 1;
- +
- + if (face_input == -1) {
- + i = ninput++;
- + input[i].d.name = TGSI_SEMANTIC_FACE;
- + input[i].d.spi_sid = 0;
- + input[i].tgsi_index = i;
- + face_input = i;
- + }
- + }
- +
- + for (i = 0; i < ninput; ++i) {
- + shader_io &in = input[i];
- + in.d.gpr = gpr_reserved++;
- +
- + if (ctx.is_egcm() && in.d.spi_sid) {
- + in.d.lds_pos = nparam++;
- + if (in.d.interpolate != TGSI_INTERPOLATE_CONSTANT) {
- + in.d.ij_index = get_ij(in);
- +
- + emit_node(build_interp(in, 1));
- + emit_node(build_interp(in, 0));
- + } else {
- + emit_node(build_interp_flat(in));
- + }
- + } else {
- + sh->add_pinned_inputs(sh->root->dst, VLK_TGSI_INPUT,
- + in.tgsi_index, 0xF, false, in.d.gpr);
- + }
- +
- + if (two_side) {
- + if (in.d.name == TGSI_SEMANTIC_COLOR) {
- + int ni = ninput++;
- + shader_io &nin = input[ni];
- + nin = in;
- + nin.d.name = TGSI_SEMANTIC_BCOLOR;
- + nin.d.spi_sid = spi_sid(nin.d.name, nin.d.sid);
- + // back_color_input actually means front_color_input here
- + nin.d.back_color_input = i;
- + nin.tgsi_index = ni;
- + } else if (in.d.name == TGSI_SEMANTIC_BCOLOR) {
- + // both inputs are interpolated now, so select the color
- + int k;
- + shader_io &fin = input[in.d.back_color_input];
- +
- + for (k = 0; k < 4; ++k) {
- + value *face = sh->get_value(VLK_TGSI_INPUT,
- + sel_chan(input[face_input].tgsi_index, 0));
- + value *fv = sh->get_value(VLK_TGSI_INPUT,
- + sel_chan(fin.tgsi_index, k));
- + value *bv = sh->get_value(VLK_TGSI_INPUT,
- + sel_chan(in.tgsi_index, k));
- + emit_alu(ALU_OP3_CNDGT, fv, 0, asrc(face), asrc(fv),
- + asrc(bv));
- + }
- + }
- + }
- + }
- +
- + if (fragcoord_input != -1) {
- + value* w = get_tgsi_value(VLK_TGSI_INPUT, fragcoord_input, SEL_W);
- + emit_alu(ALU_OP1_RECIP_IEEE, w, 0, asrc(w));
- + }
- +
- +
- + break;
- + default:
- + assert(!"unexpected target");
- + }
- + return 0;
- +}
- +
- +alu_packed_node* tgsi_translator::build_interp(shader_io& in, unsigned type) {
- + alu_packed_node *p = sh->create_alu_packed();
- + unsigned op = type == 0 ? ALU_OP2_INTERP_XY : ALU_OP2_INTERP_ZW;
- + unsigned i, gpr, base_chan;
- + value *v;
- +
- + gpr = in.d.ij_index >> 1;
- + base_chan = ((in.d.ij_index & 1) << 1) + 1;
- + for (i = 0; i < 4; ++i) {
- + alu_node *a = create_alu(op);
- + if ((i >> 1) == type)
- + v = sh->get_value(VLK_TGSI_INPUT, sel_chan(in.tgsi_index, i));
- + else
- + v = NULL;
- + a->dst.push_back(v);
- + v = sh->get_gpr_value(true, gpr, base_chan - (i & 1), false);
- + a->src.push_back(v);
- + v = sh->get_special_ro_value(
- + sel_chan(ALU_SRC_PARAM_OFFSET + in.d.lds_pos, i));
- + a->src.push_back(v);
- + a->bc.slot = i;
- + p->push_back(a);
- + }
- + return p;
- +}
- +
- +alu_group_node* tgsi_translator::build_interp_flat(shader_io& in) {
- + alu_group_node *g = sh->create_alu_group();
- + value *v;
- +
- + for (unsigned i = 0; i < 4; ++i) {
- + alu_node *a = create_alu(ALU_OP1_INTERP_LOAD_P0);
- + v = sh->get_value(VLK_TGSI_INPUT, sel_chan(in.tgsi_index, i));
- + a->dst.push_back(v);
- + v = sh->get_special_ro_value(
- + sel_chan(ALU_SRC_PARAM_OFFSET + in.d.lds_pos, i));
- + a->src.push_back(v);
- + a->bc.slot = i;
- + g->push_back(a);
- + }
- + return g;
- +}
- +
- +inline int tgsi_translator::get_ij(shader_io& in) {
- + int ij = 0;
- + if (in.d.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
- + return in.d.centroid ? 1 : 0;
- + else if (in.d.interpolate == TGSI_INTERPOLATE_LINEAR)
- + return (interp_mask & 1) + ((interp_mask >> 2) & 1) +
- + (in.d.centroid ? 1 : 0);
- + return ij;
- +}
- +
- +#define FOREACH_CHAN_UNMASKED for (unsigned ch = 0; ch < 4; ++ch)
- +#define FOREACH_CHAN FOREACH_CHAN_UNMASKED if (write_mask & (1 << ch))
- +
- +value* tgsi_translator::get_arg_value(tgsi_arg &ta, unsigned chan) {
- + if (ta.values.empty())
- + ta.values.resize(4);
- +
- + if (!ta.values[chan]) {
- + unsigned schan = ta.dst ? chan : ta.swz[chan];
- + if (ta.rel) {
- + value *r = get_tgsi_value(VLK_TGSI_ADDR, ta.rel_addr_index,
- + ta.rel_addr_chan);
- + ta.values[chan] = sh->get_reg_value(ta.kind, !ta.dst, ta.sel, schan,
- + ta.rel, r, ta.rel_array_id);
- + } else
- + ta.values[chan] = get_tgsi_value(ta.kind, ta.sel, schan);
- + }
- + return ta.values[chan];
- +}
- +
- +value* tgsi_translator::get_arg_value(unsigned index, unsigned chan) {
- + tgsi_arg &tv = index ? args.src[index - 1] : args.dst;
- + return get_arg_value(tv, chan);
- +}
- +
- +int tgsi_translator::ti_alu() {
- + switch (info->tgsi_op) {
- + case TGSI_OPCODE_SUB:
- + args.src[1].neg = !args.src[1].neg;
- + break;
- + case TGSI_OPCODE_ABS:
- + args.src[0].neg = 0;
- + args.src[0].abs = 1;
- + break;
- + }
- +
- + begin_group();
- + if (unlikely(info->flags & TIF_ALU_SWAPSRC01)) {
- + FOREACH_CHAN
- + {
- + emit_alu(info->isa_op, tgsi_dst(ch), clamp, asrc(args.src[1], ch),
- + asrc(args.src[0], ch));
- + }
- + } else {
- + FOREACH_CHAN
- + {
- + emit_alu(info->isa_op, ch);
- + }
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_dot() {
- +
- + unsigned nc, s1 = 0, i;
- + switch (info->tgsi_op) {
- + case TGSI_OPCODE_DP2:
- + nc = 2;
- + break;
- + case TGSI_OPCODE_DP3:
- + nc = 3;
- + break;
- + case TGSI_OPCODE_DP4:
- + nc = 4;
- + break;
- + case TGSI_OPCODE_DPH:
- + nc = 4;
- + s1 = 1;
- + break;
- + default:
- + nc = 0;
- + assert(!"ti_dot: unexpected tgsi opcode");
- + }
- +
- + unsigned ch = __builtin_ctz(write_mask);
- + unsigned nwc = __builtin_popcount(write_mask);
- +
- + value *t = nwc > 1 ? create_temp() : tgsi_dst(ch);
- +
- + alu_packed_node *p = sh->create_alu_packed();
- + alu_node *a;
- + for (i = 0; i < nc - s1; ++i) {
- + a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp,
- + asrc(args.src[0], i), asrc(args.src[1], i));
- + a->bc.slot = i;
- + p->push_back(a);
- + }
- + if (s1) {
- + a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp,
- + asrc(literal(1.0f)), asrc(args.src[1], i));
- + a->bc.slot = i++;
- + p->push_back(a);
- + }
- + for (; i < 4; ++i) {
- + a = build_alu(ALU_OP2_DOT4, (i == ch) ? t : NULL, clamp,
- + asrc(literal(0)), asrc(literal(0)));
- + a->bc.slot = i;
- + p->push_back(a);
- + }
- + emit_node(p);
- +
- + if (nwc > 1)
- + ti_replicate(t);
- +
- + return 0;
- +}
- +
- +int tgsi_translator::ti_repl() {
- + switch (info->tgsi_op) {
- + case TGSI_OPCODE_RSQ:
- + args.src[0].abs = 1;
- + args.src[0].neg = 0;
- + break;
- + }
- +
- + value *t = create_temp();
- + emit_alu(info->isa_op, t, clamp, asrc(args.src[0], SEL_X));
- + ti_replicate(t);
- + return 0;
- +}
- +
- +int tgsi_translator::emit_alu(unsigned op, int chan, int dstchan) {
- + unsigned slots = ctx.alu_slots(op);
- + int expand = 0, i;
- +
- + dstchan = (dstchan == -1) ? chan : dstchan;
- +
- + assert(slots);
- + if (ctx.is_cayman() && (slots == AF_S || (slots & AF_CM_EXPAND)))
- + expand = 4;
- +
- + if (expand) {
- + alu_packed_node *p = sh->create_alu_packed();
- + for (i = 0; i < expand; ++i) {
- + alu_node *a = build_alu(op, chan, i);
- + if (i != dstchan)
- + a->dst[0] = NULL;
- + a->bc.slot = i;
- + p->push_back(a);
- + }
- + emit_node(p);
- + } else {
- + alu_node *a = build_alu(op, chan, dstchan);
- + emit_node(a);
- + }
- + return 0;
- +}
- +
- +alu_node* tgsi_translator::build_alu(unsigned op, int chan, int dstchan) {
- + alu_node *a = create_alu(op);
- + unsigned i, nsrc = a->bc.op_ptr->src_count;
- +
- + dstchan = (dstchan == -1) ? chan : dstchan;
- + a->dst.push_back(tgsi_dst(dstchan));
- + a->bc.clamp = clamp;
- +
- + for (i = 0; i < nsrc; ++i) {
- + a->src.push_back(get_arg_value(1 + i, chan));
- + a->bc.src[i].neg = args.src[i].neg;
- + a->bc.src[i].abs = args.src[i].abs;
- + }
- + return a;
- +}
- +
- +alu_node* tgsi_translator::create_alu(unsigned op) {
- + alu_node *a = sh->create_alu();
- + a->bc.set_op(op);
- + a->bc.slot_flags = (alu_op_flags) ctx.alu_slots(a->bc.op_ptr);
- + if (a->bc.op_ptr->flags & AF_KILL) {
- + a->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL
- + | NF_SCHEDULE_EARLY;
- + } else if (a->bc.op_ptr->flags & (AF_PRED | AF_MOVA)) {
- + a->flags |= NF_DONT_HOIST;
- + }
- + return a;
- +}
- +
- +int tgsi_translator::ti_trig() {
- + value *t = prepare_trig(asrc(args.src[0], 0));
- + emit_alu(info->isa_op, t, clamp, asrc(t));
- + ti_replicate(t);
- + return 0;
- +}
- +
- +int tgsi_translator::ti_scs() {
- + value *t = prepare_trig(asrc(args.src[0], 0));
- +
- + begin_group();
- + if (write_mask & (1 << SEL_X))
- + emit_alu(ALU_OP1_COS, tgsi_dst(SEL_X), clamp, asrc(t));
- + if (write_mask & (1 << SEL_Y))
- + emit_alu(ALU_OP1_SIN, tgsi_dst(SEL_Y), clamp, asrc(t));
- + if (write_mask & (1 << SEL_Z))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), 0, asrc(0.0f));
- + if (write_mask & (1 << SEL_W))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
- + end_group();
- + return 0;
- +}
- +
- +value* tgsi_translator::prepare_trig(alu_src s) {
- + static float half_inv_pi = 1.0 / (3.1415926535 * 2);
- + static float double_pi = 3.1415926535 * 2;
- + static float neg_pi = -3.1415926535;
- +
- + value *t = create_temp();
- +
- + emit_alu(ALU_OP3_MULADD, t, 0, s, asrc(half_inv_pi), asrc(0.5f));
- + emit_alu(ALU_OP1_FRACT, t, 0, asrc(t));
- +
- + if (ctx.is_r600())
- + emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(double_pi), asrc(neg_pi));
- + else
- +#if 0
- + emit_alu(ALU_OP2_ADD, t, 0, asrc(t), asrc(-0.5f));
- +#else
- + // using muladd just to reduce differences from default backend for
- + // debugging
- + emit_alu(ALU_OP3_MULADD, t, 0, asrc(t), asrc(1.0f), asrc(0.5f, 0, 1));
- +#endif
- +
- + return t;
- +}
- +
- +int tgsi_translator::ti_exp() {
- + value* t = create_temp();
- +
- + if (write_mask & (1 << SEL_X))
- + emit_alu(ALU_OP1_FLOOR, t, 0, asrc(args.src[0], 0));
- +
- + begin_group();
- + if (write_mask & (1 << SEL_X))
- + emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_X), clamp, asrc(t));
- + if (write_mask & (1 << SEL_Y))
- + emit_alu(ALU_OP1_FRACT, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], 0));
- + if (write_mask & (1 << SEL_Z))
- + emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp,
- + asrc(args.src[0], 0));
- + if (write_mask & (1 << SEL_W))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_log() {
- + value *t = create_temp();
- + value *t2 = create_temp();
- + value *t3 = create_temp();
- +
- + alu_src s = asrc(args.src[0], 0, 1, 0);
- +
- + if (write_mask & 0x7)
- + emit_alu(ALU_OP1_LOG_IEEE, t, 0, s);
- + if (write_mask & 0x3)
- + emit_alu(ALU_OP1_FLOOR, t2, 0, asrc(t));
- + if (write_mask & (1 << SEL_Y)) {
- + emit_alu(ALU_OP1_EXP_IEEE, t3, 0, asrc(t2));
- + emit_alu(ALU_OP1_RECIP_IEEE, t3, 0, asrc(t3));
- + }
- +
- + begin_group();
- + if (write_mask & (1 << SEL_X))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), clamp, asrc(t2));
- + if (write_mask & (1 << SEL_Y))
- + emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, s, asrc(t3));
- + if (write_mask & (1 << SEL_Z))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(t));
- + if (write_mask & (1 << SEL_W))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_dst() {
- + begin_group();
- + if (write_mask & (1 << SEL_X))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f));
- + if (write_mask & (1 << SEL_Y))
- + emit_alu(ALU_OP2_MUL, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_Y),
- + asrc(args.src[1], SEL_Y));
- + if (write_mask & (1 << SEL_Z))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_Z), clamp, asrc(args.src[0], SEL_Z));
- + if (write_mask & (1 << SEL_W))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), clamp, asrc(args.src[1], SEL_W));
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_lrp() {
- + vvec t;
- + create_temps(t, 4);
- +
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP2_ADD, t[ch], 0, asrc(1.0f),
- + asrc(args.src[0], ch, 0, 1));
- + emit_alu(ALU_OP2_MUL, t[ch], 0, asrc(t[ch]), asrc(args.src[2], ch));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP3_MULADD, tgsi_dst(ch), clamp, asrc(args.src[0], ch),
- + asrc(args.src[1], ch), asrc(t[ch]));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_pow() {
- + value* t = create_temp();
- +
- + emit_alu(ALU_OP1_LOG_IEEE, t, 0, asrc(args.src[0], 0));
- + emit_alu(ALU_OP2_MUL, t, 0, asrc(args.src[1], 0), asrc(t));
- + emit_alu(ALU_OP1_EXP_IEEE, t, clamp, asrc(t));
- + ti_replicate(t);
- + return 0;
- +}
- +
- +int tgsi_translator::ti_replicate(value* t) {
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(t));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_xpd() {
- + static const unsigned int src0_swizzle[] = { 2, 0, 1 };
- + static const unsigned int src1_swizzle[] = { 1, 2, 0 };
- + vvec t;
- +
- + create_temps(t, 3);
- + FOREACH_CHAN
- + {
- + if (ch < SEL_W)
- + emit_alu(ALU_OP2_MUL, t[ch], 0, asrc(args.src[0], src0_swizzle[ch]),
- + asrc(args.src[1], src1_swizzle[ch]));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + if (ch < SEL_W)
- + emit_alu(ALU_OP3_MULADD, tgsi_dst(ch), clamp,
- + asrc(args.src[0], src1_swizzle[ch]),
- + asrc(args.src[1], src0_swizzle[ch]), asrc(t[ch], 0, 1));
- + else
- + emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(1.0f));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_kill() {
- + int i;
- +
- + // XXX if this affects performance, we might want to do it after DCE
- + uses_kill = true;
- +
- + for (i = 0; i < 4; ++i) {
- + if (info->tgsi_op == TGSI_OPCODE_KILL_IF)
- + emit_alu(ALU_OP2_KILLGT, NULL, 0, asrc(0.0f), asrc(args.src[0], i));
- + else
- + emit_alu(ALU_OP2_KILLGT, NULL, 0, asrc(1.0f), asrc(0.0f));
- + }
- + return 0;
- +}
- +
- +int tgsi_translator::ti_arl() {
- + switch (info->tgsi_op) {
- + case TGSI_OPCODE_ARL:
- + if (ctx.is_egcm()) {
- + emit_alu(ALU_OP1_FLT_TO_INT_FLOOR, tgsi_dst(SEL_X), 0,
- + asrc(args.src[0], 0));
- + } else {
- + value *t = create_temp();
- + emit_alu(ALU_OP1_FLOOR, t, 0, asrc(args.src[0], 0));
- + emit_alu(ALU_OP1_FLT_TO_INT, tgsi_dst(SEL_X), 0, asrc(t));
- + }
- + break;
- + case TGSI_OPCODE_ARR:
- + emit_alu(ALU_OP1_FLT_TO_INT, tgsi_dst(SEL_X), 0, asrc(args.src[0], 0));
- + break;
- + case TGSI_OPCODE_UARL:
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(args.src[0], 0));
- + break;
- + default:
- + assert(!"ti_arl: unexpected opcode");
- + }
- + return 0;
- +}
- +
- +int tgsi_translator::ti_ssg() {
- + vvec t;
- + create_temps(t, 4);
- + if (info->tgsi_op == TGSI_OPCODE_SSG) {
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP3_CNDGE, t[ch], 0, asrc(args.src[0], ch), asrc(0.0f),
- + asrc(-1.0f));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP3_CNDGT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- + asrc(1.0f), asrc(t[ch]));
- + }
- + end_group();
- + } else { // ISSG
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP3_CNDGE_INT, t[ch], 0, asrc(args.src[0], ch),
- + asrc(0u), asrc(-1u));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP3_CNDGT_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- + asrc(1u), asrc(t[ch]));
- + }
- + end_group();
- + }
- + return 0;
- +}
- +
- +int tgsi_translator::ti_cmp() {
- + begin_group();
- + FOREACH_CHAN
- + {
- + if (info->tgsi_op == TGSI_OPCODE_CMP)
- + emit_alu(ALU_OP3_CNDGE, tgsi_dst(ch), clamp, asrc(args.src[0], ch),
- + asrc(args.src[2], ch), asrc(args.src[1], ch));
- + else
- + emit_alu(ALU_OP3_CNDE_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- + asrc(args.src[2], ch), asrc(args.src[1], ch));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_umad() {
- + vvec t;
- + create_temps(t, 4);
- +
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP2_MULLO_INT, t[ch], 0, asrc(args.src[0], ch),
- + asrc(args.src[1], ch));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP2_ADD_INT, tgsi_dst(ch), 0, asrc(t[ch]),
- + asrc(args.src[2], ch));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_f2iu() {
- + vvec t;
- + create_temps(t, 4);
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP1_TRUNC, t[ch], 0, asrc(args.src[0], ch));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(info->isa_op, tgsi_dst(ch), 0, t[ch]);
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_ineg() {
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP2_SUB_INT, tgsi_dst(ch), 0, asrc(0u),
- + asrc(args.src[0], ch));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_iabs() {
- + vvec t;
- + create_temps(t, 4);
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP2_SUB_INT, t[ch], 0, asrc(0u), asrc(args.src[0], ch));
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP3_CNDGE_INT, tgsi_dst(ch), 0, asrc(args.src[0], ch),
- + asrc(args.src[0], ch), asrc(t[ch]));
- + }
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_divmod() {
- +
- + bool signed_op = false, mod = false;
- + switch (info->tgsi_op) {
- + case TGSI_OPCODE_MOD:
- + mod = true;
- + case TGSI_OPCODE_IDIV:
- + signed_op = true;
- + break;
- + case TGSI_OPCODE_UMOD:
- + mod = true;
- + case TGSI_OPCODE_UDIV:
- + break;
- + default:
- + assert(!"ti_divmod: unexpected tgsi opcode");
- + }
- +
- + // TODO optimize for constant src1 (omit RECIP error correction)
- +
- + value *t0x = create_temp();
- + value *t0y = create_temp();
- + value *t0z = create_temp();
- + value *t0w = create_temp();
- + value *t1x = create_temp();
- + value *t1y = create_temp();
- + value *t1z = create_temp();
- + value *t1w = create_temp();
- + value *t2x = create_temp();
- + value *t2y = create_temp();
- + value *t2z = create_temp();
- + value *t3x = create_temp();
- +
- + vvec dst;
- + create_temps(dst, 4);
- +
- + FOREACH_CHAN
- + {
- + if (signed_op) {
- + /* tmp2.x = -src0 */
- + emit_alu(ALU_OP2_SUB_INT, t2x, 0, asrc(0u), asrc(args.src[0], ch));
- + /* tmp2.y = -src1 */
- + emit_alu(ALU_OP2_SUB_INT, t2y, 0, asrc(0u), asrc(args.src[1], ch));
- + /* tmp2.z sign bit is set if src0 and src2 signs are different */
- + /* it will be a sign of the quotient */
- + if (!mod) {
- + emit_alu(ALU_OP2_XOR_INT, t2z, 0, asrc(args.src[0], ch),
- + asrc(args.src[1], ch));
- + }
- + /* tmp2.x = |src0| */
- + emit_alu(ALU_OP3_CNDGE_INT, t2x, 0, asrc(args.src[0], ch),
- + asrc(args.src[0], ch), asrc(t2x));
- + /* tmp2.y = |src1| */
- + emit_alu(ALU_OP3_CNDGE_INT, t2y, 0, asrc(args.src[1], ch),
- + asrc(args.src[1], ch), asrc(t2y));
- + } else { // unsigned
- + // copy sources to the same temps as in signed variant just
- + // to simplify generation of further operations.
- + // copies will be propagated later anyway.
- + emit_alu(ALU_OP1_MOV, t2x, 0, asrc(args.src[0], ch));
- + emit_alu(ALU_OP1_MOV, t2y, 0, asrc(args.src[1], ch));
- + }
- +
- + /* 1. tmp0.x = rcp_u (src2) (2^32/src2 + e, e - rounding error)*/
- + if (ctx.is_cayman()) {
- + /* tmp3.x = u2f(src2) */
- + emit_alu(ALU_OP1_UINT_TO_FLT, t3x, 0, asrc(t2y));
- + /* tmp0.x = recip(tmp3.x) */
- + emit_alu(ALU_OP1_RECIP_IEEE, t0x, 0, asrc(t3x));
- + /* tmp3.x = tmp0.x * float(0x4f800000) */
- + emit_alu(ALU_OP2_MUL, t3x, 0, asrc(t0x), asrc(0x4f800000u));
- + /* tmp0.x = f2u (tmp3.x) */
- + emit_alu(ALU_OP1_FLT_TO_UINT, t0x, 0, asrc(t3x));
- + } else {
- + /* tmp0.x = recip_uint src2 */
- + emit_alu(ALU_OP1_RECIP_UINT, t0x, 0, asrc(t2y));
- + }
- +
- + /* 2. tmp0.z = lo (tmp0.x * src2) */
- + emit_alu(ALU_OP2_MULLO_UINT, t0z, 0, asrc(t0x), asrc(t2y));
- + /* 3. tmp0.w = -tmp0.z */
- + emit_alu(ALU_OP2_SUB_INT, t0w, 0, asrc(0u), asrc(t0z));
- + /* 4. tmp0.y = hi (tmp0.x * src2) */
- + emit_alu(ALU_OP2_MULHI_UINT, t0y, 0, asrc(t0x), asrc(t2y));
- + /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */
- + emit_alu(ALU_OP3_CNDE_INT, t0z, 0, asrc(t0y), asrc(t0w), asrc(t0z));
- + /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */
- + emit_alu(ALU_OP2_MULHI_UINT, t0w, 0, asrc(t0z), asrc(t0x));
- + /* 7. tmp1.x = tmp0.x - tmp0.w */
- + emit_alu(ALU_OP2_SUB_INT, t1x, 0, asrc(t0x), asrc(t0w));
- + /* 8. tmp1.y = tmp0.x + tmp0.w */
- + emit_alu(ALU_OP2_ADD_INT, t1y, 0, asrc(t0x), asrc(t0w));
- + /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */
- + emit_alu(ALU_OP3_CNDE_INT, t0x, 0, asrc(t0y), asrc(t1y), asrc(t1x));
- + /* 10. tmp0.z = hi(tmp0.x * src1) = q */
- + emit_alu(ALU_OP2_MULHI_UINT, t0z, 0, asrc(t0x), asrc(t2x));
- + /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */
- + emit_alu(ALU_OP2_MULLO_UINT, t0y, 0, asrc(t2y), asrc(t0z));
- + /* 12. tmp0.w = src1 - tmp0.y = r */
- + emit_alu(ALU_OP2_SUB_INT, t0w, 0, asrc(t2x), asrc(t0y));
- + /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */
- + emit_alu(ALU_OP2_SETGE_UINT, t1x, 0, asrc(t0w), asrc(t2y));
- + /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */
- + emit_alu(ALU_OP2_SETGE_UINT, t1y, 0, asrc(t2x), asrc(t0y));
- +
- + if (mod) { /* UMOD */
- + /* 15. tmp1.z = tmp0.w - src2 = r - src2 */
- + emit_alu(ALU_OP2_SUB_INT, t1z, 0, asrc(t0w), asrc(t2y));
- + /* 16. tmp1.w = tmp0.w + src2 = r + src2 */
- + emit_alu(ALU_OP2_ADD_INT, t1w, 0, asrc(t0w), asrc(t2y));
- + } else { /* UDIV */
- + /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */
- + emit_alu(ALU_OP2_ADD_INT, t1z, 0, asrc(t0z), asrc(1u));
- + /* 16. tmp1.w = tmp0.z - 1 = q - 1 */
- + emit_alu(ALU_OP2_ADD_INT, t1w, 0, asrc(t0z), asrc(-1u));
- + }
- +
- + /* 17. tmp1.x = tmp1.x & tmp1.y */
- + emit_alu(ALU_OP2_AND_INT, t1x, 0, asrc(t1x), asrc(t1y));
- +
- + if (mod) {
- + /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */
- + emit_alu(ALU_OP3_CNDE_INT, t0z, 0, asrc(t1x), asrc(t0w), asrc(t1z));
- + } else {
- + /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */
- + emit_alu(ALU_OP3_CNDE_INT, t0z, 0, asrc(t1x), asrc(t0z), asrc(t1z));
- + }
- +
- + if (signed_op) {
- + /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */
- + emit_alu(ALU_OP3_CNDE_INT, t0z, 0, asrc(t1y), asrc(t1w), asrc(t0z));
- +
- + /* fix the sign of the result */
- + /* tmp0.x = -tmp0.z */
- + emit_alu(ALU_OP2_SUB_INT, t0x, 0, asrc(0u), asrc(t0z));
- + if (mod) {
- + /* sign of the remainder is the same as the sign of src0 */
- + /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */
- + emit_alu(ALU_OP3_CNDGE_INT, dst[ch], 0, asrc(t2x),
- + asrc(t0z), asrc(t0x));
- + } else {
- + /* fix the quotient sign (same as the sign of src0*src1) */
- + /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */
- + emit_alu(ALU_OP3_CNDGE_INT, dst[ch], 0, asrc(t2z),
- + asrc(t0z), asrc(t0x));
- + }
- + } else { // unsigned
- + /* 19. dst = tmp1.y==0 ? tmp1.w : tmp0.z */
- + emit_alu(ALU_OP3_CNDE_INT, dst[ch], 0, asrc(t1y), asrc(t1w),
- + asrc(t0z));
- + }
- + }
- + begin_group();
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP1_MOV, tgsi_dst(ch), 0, asrc(dst[ch]));
- + }
- + end_group();
- + return 0;
- +}
- +
- +fetch_node* tgsi_translator::create_fetch(unsigned op) {
- + fetch_node *f = sh->create_fetch();
- + f->bc.set_op(op);
- + f->src.resize(4);
- + f->dst.resize(4);
- + VSWZ_XYZW(f->bc.dst_sel);
- + return f;
- +}
- +
- +alu_node* tgsi_translator::build_alu(unsigned op, value *dst, int clamp,
- + value *s0, int s0abs, int s0neg, value *s1,
- + int s1abs, int s1neg, value *s2, int s2abs,
- + int s2neg) {
- +
- + alu_node *a = create_alu(op);
- + unsigned nsrc = a->bc.op_ptr->src_count;
- +
- + a->dst.push_back(dst);
- + a->bc.clamp = clamp;
- +
- + if (nsrc >= 1) {
- + a->src.push_back(s0);
- + a->bc.src[0].neg = s0neg;
- + a->bc.src[0].abs = s0abs;
- + if (nsrc >= 2) {
- + a->src.push_back(s1);
- + a->bc.src[1].neg = s1neg;
- + a->bc.src[1].abs = s1abs;
- + if (nsrc == 3) {
- + a->src.push_back(s2);
- + a->bc.src[2].neg = s2neg;
- + a->bc.src[2].abs = s2abs;
- + }
- + }
- + }
- + return a;
- +}
- +
- +inline value_kind tgsi_translator::file_to_value_kind(unsigned file) {
- + switch (file) {
- + case TGSI_FILE_INPUT:
- + return VLK_TGSI_INPUT;
- + case TGSI_FILE_OUTPUT:
- + return VLK_TGSI_OUTPUT;
- + case TGSI_FILE_TEMPORARY:
- + return VLK_TGSI_TEMP;
- + case TGSI_FILE_ADDRESS:
- + return VLK_TGSI_ADDR;
- + case TGSI_FILE_IMMEDIATE:
- + return VLK_CONST;
- + case TGSI_FILE_CONSTANT:
- + return VLK_KCACHE;
- + }
- + assert(!"unexpected tgsi file");
- + return VLK_INVALID;
- +}
- +
- +inline alu_src tgsi_translator::asrc(value *v, int abs, int neg) {
- + return alu_src(v, abs, neg);
- +}
- +
- +inline alu_src tgsi_translator::asrc(literal l, int abs, int neg) {
- + return alu_src(sh->get_const_value(l), abs, neg);
- +}
- +
- +inline alu_src tgsi_translator::asrc(tgsi_arg& ta, int chan) {
- + return alu_src(get_arg_value(ta, chan), ta.abs, ta.neg);
- +}
- +
- +inline alu_src tgsi_translator::asrc(tgsi_arg& ta, int chan, int abs, int neg) {
- + int sabs = ta.abs;
- + int sneg = ta.neg;
- +
- + if (abs) {
- + sabs = 1;
- + sneg = 0;
- + }
- + if (neg) {
- + sneg = !sneg;
- + }
- + return alu_src(get_arg_value(ta, chan), sabs, sneg);
- +}
- +inline alu_src tgsi_translator::asrc(float f, int abs, int neg) {
- + return alu_src(sh->get_const_value(literal(f)), abs, neg);
- +}
- +inline alu_src tgsi_translator::asrc(uint32_t u, int abs, int neg) {
- + return alu_src(sh->get_const_value(literal(u)), abs, neg);
- +}
- +
- +int tgsi_translator::ti_lit() {
- +
- + value *tx = create_temp();
- + value *tz = create_temp();
- +
- + if (write_mask & (1 << SEL_Z)) {
- + emit_alu(ALU_OP2_MAX, tx, 0, asrc(args.src[0], SEL_Y), asrc(0.0f));
- + emit_alu(ALU_OP1_LOG_CLAMPED, tz, 0, asrc(tx));
- + emit_alu(ALU_OP3_MUL_LIT, tx, 0, asrc(tz), asrc(args.src[0], SEL_W),
- + asrc(args.src[0], SEL_X));
- + }
- + begin_group();
- + if (write_mask & (1 << SEL_X))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0, asrc(1.0f));
- + if (write_mask & (1 << SEL_Y))
- + emit_alu(ALU_OP2_MAX, tgsi_dst(SEL_Y), clamp, asrc(args.src[0], SEL_X),
- + asrc(0.0f));
- + if (write_mask & (1 << SEL_Z))
- + emit_alu(ALU_OP1_EXP_IEEE, tgsi_dst(SEL_Z), clamp, asrc(tx));
- + if (write_mask & (1 << SEL_W))
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_W), 0, asrc(1.0f));
- + end_group();
- + return 0;
- +}
- +
- +int tgsi_translator::emit_alu(unsigned op, value* dst, int clamp, value* s0,
- + int s0abs, int s0neg, value* s1, int s1abs,
- + int s1neg, value* s2, int s2abs, int s2neg) {
- +
- + unsigned slots = ctx.alu_slots(op);
- + int expand = 0, i;
- +
- + assert(slots);
- + if (ctx.is_cayman() && (slots == AF_S || (slots & AF_CM_EXPAND)))
- + expand = 4;
- + if (expand) {
- + int chan_hint = dst ? dst->select.chan() : 0;
- + alu_packed_node *p = sh->create_alu_packed();
- + for (i = 0; i < expand; ++i) {
- + alu_node *a = build_alu(op, (i == chan_hint) ? dst : NULL, clamp,
- + s0, s0abs, s0neg, s1, s1abs, s1neg, s2, s2abs, s2neg);
- + a->bc.slot = i;
- + p->push_back(a);
- + }
- + emit_node(p);
- + } else {
- + alu_node *a = build_alu(op, dst, clamp, s0, s0abs, s0neg, s1, s1abs,
- + s1neg, s2, s2abs, s2neg);
- + emit_node(a);
- + }
- + return 0;
- +}
- +
- +value* tgsi_translator::get_tgsi_value(value_kind kind, unsigned index,
- + unsigned chan) {
- + switch (kind) {
- + case VLK_REG:
- + return sh->get_gpr_value(true, index, chan, false);
- + case VLK_CONST:
- + return sh->get_const_value(literals[(index << 2) + chan]);
- + case VLK_KCACHE:
- + return sh->get_kcache_value(0, index, chan);
- + case VLK_TGSI_INPUT:
- + case VLK_TGSI_OUTPUT:
- + case VLK_TGSI_TEMP:
- + case VLK_TGSI_ADDR:
- + return sh->get_value(kind, sel_chan(index, chan));
- + default:
- + assert(!"unexpected value kind");
- + }
- + return NULL;
- +}
- +
- +int tgsi_translator::update_pipe_shader() {
- + int i;
- +
- + for (i = 0; i < ninput; ++i) {
- + r600_shader_io *p = &ps->shader.input[i];
- + shader_io *s = &input[i];
- + memcpy(p, &s->d, sizeof(r600_shader_io));
- + }
- +
- + for (i = 0; i < noutput; ++i) {
- + r600_shader_io *p = &ps->shader.output[i];
- + shader_io *s = &output[i];
- + memcpy(p, &s->d, sizeof(r600_shader_io));
- + }
- +
- + ps->shader.ninput = ninput;
- + ps->shader.noutput = noutput;
- +
- + // XXX this seems unused in the driver after some changes,
- + // probably needs to be simply removed
- + ps->shader.nr_ps_max_color_exports = nr_ps_max_color_exports;
- +
- + ps->shader.nr_ps_color_exports = nr_ps_color_exports;
- + ps->shader.clip_dist_write = clip_dist_write;
- + ps->shader.fs_write_all = fs_write_all;
- + ps->shader.processor_type = tgsi_proc;
- + ps->shader.uses_kill = uses_kill;
- + ps->shader.vs_out_misc_write = vs_out_misc_write;
- + ps->shader.vs_out_point_size = vs_out_point_size;
- + ps->shader.uses_tex_buffers = uses_tex_buffers;
- + ps->shader.has_txq_cube_array_z_comp = has_txq_cube_array_z_comp;
- + ps->shader.two_side = two_side;
- +
- + return 0;
- +}
- +
- +uint32_t tgsi_translator::get_immediate(sel_chan sc) {
- + return literals[sc - 1];
- +}
- +
- +int tgsi_translator::ti_buffer_txq() {
- + int id = args.src[1].sel;
- +
- + if (ctx.is_egcm())
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0,
- + asrc(
- + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id >> 2,
- + id & 3)));
- + else
- + emit_alu(ALU_OP1_MOV, tgsi_dst(SEL_X), 0,
- + asrc(
- + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1,
- + 1)));
- + return 0;
- +}
- +
- +int tgsi_translator::ti_vtx_fetch() {
- +
- + int id = args.src[1].sel, i;
- +
- + fetch_node *f = create_fetch(FETCH_OP_VFETCH);
- + f->bc.resource_id = id + R600_MAX_CONST_BUFFERS;
- + f->bc.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
- + f->bc.mega_fetch_count = 16;
- + f->bc.use_const_fields = 1;
- + f->bc.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
- +
- + for (i = 0; i < 4; ++i) {
- + unsigned s = (write_mask & (1 << i)) ? i : SEL_MASK;
- + f->bc.dst_sel[i] = s;
- + if (s != SEL_MASK)
- + f->dst[i] = get_arg_value(0, i);
- + }
- +
- + f->src = get_vector_values(args.src[0].kind, args.src[0].sel,
- + args.src[0].swz);
- +
- + emit_node(f);
- +
- + if (ctx.is_egcm())
- + return 0;
- +
- + FOREACH_CHAN
- + {
- + emit_alu(ALU_OP2_AND_INT, f->dst[ch], 0, asrc(f->dst[ch]),
- + asrc(
- + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER, id << 1,
- + ch)));
- + }
- +
- + if (write_mask & (1 << SEL_W)) {
- + emit_alu(ALU_OP2_AND_INT, f->dst[SEL_W], 0, asrc(f->dst[SEL_W]),
- + asrc(
- + sh->get_kcache_value(R600_BUFFER_INFO_CONST_BUFFER,
- + 1 + (id << 1), 0)));
- + }
- + return 0;
- +}
- +
- +int tgsi_translator::ti_tex() {
- + unsigned tgsi_op = info->tgsi_op;
- + unsigned texture = inst->Texture.Texture;
- + unsigned sampler_src_reg = (tgsi_op == TGSI_OPCODE_TXQ_LZ) ? 0 : 1;
- + int8_t offset[3] = { };
- + int opcode, i;
- + vvec src;
- +
- + bool read_compressed_msaa = ps->shader.bc.has_compressed_msaa_texturing
- + && tgsi_op == TGSI_OPCODE_TXF
- + && (texture == TGSI_TEXTURE_2D_MSAA
- + || texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
- +
- + if (tgsi_op == TGSI_OPCODE_TXQ
- + && ((texture == TGSI_TEXTURE_CUBE_ARRAY
- + || texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
- + if (write_mask & (1 << SEL_Z))
- + has_txq_cube_array_z_comp = true;
- +
- + if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2
- + || inst->Instruction.Opcode == TGSI_OPCODE_TXB2
- + || inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
- + sampler_src_reg = 2;
- +
- + if (texture == TGSI_TEXTURE_BUFFER) {
- + if (tgsi_op == TGSI_OPCODE_TXQ) {
- + uses_tex_buffers = true;
- + return ti_buffer_txq();
- + } else if (tgsi_op == TGSI_OPCODE_TXF) {
- + if (!ctx.is_egcm())
- + uses_tex_buffers = true;
- + return ti_vtx_fetch();
- + }
- + }
- +
- + if (tgsi_op == TGSI_OPCODE_TXF) {
- + /* get offset values */
- + if (inst->Texture.NumOffsets) {
- + assert(inst->Texture.NumOffsets == 1);
- + offset[0] = literals[inst->TexOffsets[0].Index
- + + inst->TexOffsets[0].SwizzleX] << 1;
- + offset[1] = literals[inst->TexOffsets[0].Index
- + + inst->TexOffsets[0].SwizzleY] << 1;
- + offset[2] = literals[inst->TexOffsets[0].Index
- + + inst->TexOffsets[0].SwizzleZ] << 1;
- + }
- + } else if (tgsi_op == TGSI_OPCODE_TXP) {
- + /* Add perspective divide */
- + value *t = create_temp();
- + vvec t2;
- + create_temps(t2, 4);
- +
- + emit_alu(ALU_OP1_RECIP_IEEE, t, 0, asrc(args.src[0], SEL_W));
- + for (i = 0; i < 3; i++)
- + emit_alu(ALU_OP2_MUL, t2[i], 0, asrc(t), asrc(args.src[0], i));
- + emit_alu(ALU_OP1_MOV, t2[SEL_W], 0, asrc(1.0f));
- + src = t2;
- + }
- +
- + if ((texture == TGSI_TEXTURE_CUBE || texture == TGSI_TEXTURE_CUBE_ARRAY
- + || texture == TGSI_TEXTURE_SHADOWCUBE
- + || texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- + && tgsi_op != TGSI_OPCODE_TXQ && tgsi_op != TGSI_OPCODE_TXQ_LZ) {
- +
- + vvec tv;
- + static const int cube_swizzle[] = { SEL_Z, SEL_Z, SEL_X, SEL_Y };
- + alu_packed_node *p = sh->create_alu_packed();
- +
- + create_temps(tv, 4);
- + for (i = 0; i < 4; ++i) {
- + alu_node *a = build_alu(ALU_OP2_CUBE, tv[i], 0,
- + asrc(args.src[0], cube_swizzle[i]),
- + asrc(args.src[0], cube_swizzle[3 - i]));
- + a->bc.slot = i;
- + p->push_back(a);
- + }
- + emit_node(p);
- +
- + emit_alu(ALU_OP1_RECIP_IEEE, tv[SEL_Z], 0, asrc(tv[SEL_Z], 1));
- + emit_alu(ALU_OP3_MULADD, tv[SEL_X], 0, asrc(tv[SEL_X]), asrc(tv[SEL_Z]),
- + asrc(1.5f));
- + emit_alu(ALU_OP3_MULADD, tv[SEL_Y], 0, asrc(tv[SEL_Y]), asrc(tv[SEL_Z]),
- + asrc(1.5f));
- +
- + /* write initial compare value into Z component
- + - W src 0 for shadow cube
- + - X src 1 for shadow cube array */
- + if (texture == TGSI_TEXTURE_SHADOWCUBE)
- + emit_alu(ALU_OP1_MOV, tv[SEL_Z], 0, asrc(args.src[0], SEL_W));
- + else if (texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- + emit_alu(ALU_OP1_MOV, tv[SEL_Z], 0, asrc(args.src[1], SEL_X));
- +
- + if (texture == TGSI_TEXTURE_CUBE_ARRAY
- + || texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- + if (ctx.is_egcm()) {
- + emit_alu(ALU_OP3_MULADD, tv[SEL_W], 0, asrc(args.src[0], SEL_W),
- + asrc(8.0f), asrc(tv[SEL_W]));
- + } else {
- + fetch_node *f = create_fetch(FETCH_OP_SET_CUBEMAP_INDEX);
- + f->bc.sampler_id = args.src[sampler_src_reg].sel;
- + f->bc.resource_id = f->bc.sampler_id + R600_MAX_CONST_BUFFERS;
- +
- + FILLV4(f->src, get_arg_value(1, SEL_W));
- + FILLV4(f->bc.coord_type, 1);
- +
- + emit_node(f);
- + }
- + }
- +
- + /* for cube forms of lod and bias we need to route things */
- + // XXX just copy target value pointer to src?
- + if (tgsi_op == TGSI_OPCODE_TXB || tgsi_op == TGSI_OPCODE_TXL)
- + emit_alu(ALU_OP1_MOV, tv[SEL_Z], 0, asrc(args.src[0], SEL_W));
- + else if (tgsi_op == TGSI_OPCODE_TXB2 || tgsi_op == TGSI_OPCODE_TXL2)
- + emit_alu(ALU_OP1_MOV, tv[SEL_Z], 0, asrc(args.src[1], SEL_X));
- +
- + src = tv;
- + }
- +
- + if (src.empty()) {
- + src = get_vector_values(args.src[0].kind, args.src[0].sel,
- + args.src[0].swz);
- + }
- +
- + /* Obtain the sample index for reading a compressed MSAA color texture.
- + * To read the FMASK, we use the ldfptr instruction, which tells us
- + * where the samples are stored.
- + * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210,
- + * which is the identity mapping. Each nibble says which physical sample
- + * should be fetched to get that sample.
- + *
- + * Assume src.z contains the sample index. It should be modified like this:
- + * src.z = (ldfptr() >> (src.z * 4)) & 0xF;
- + * Then fetch the texel with src.
- + */
- + if (read_compressed_msaa) {
- + fetch_node *f = create_fetch(FETCH_OP_LD);
- +
- + f->bc.inst_mod = 1; /* LDFPTR */
- + f->bc.sampler_id = args.src[sampler_src_reg].sel;
- + f->bc.resource_id = f->bc.sampler_id + R600_MAX_CONST_BUFFERS;
- +
- + value* tw = create_temp();
- +
- + f->src = src;
- + f->dst[3] = tw;
- +
- + VSWZ_INIT(f->bc.dst_sel, SEL_MASK, SEL_MASK, SEL_MASK, SEL_X);
- +
- + f->bc.offset[0] = offset[0];
- + f->bc.offset[1] = offset[1];
- + f->bc.offset[2] = offset[2];
- +
- + emit_node(f);
- +
- + value *tx = create_temp();
- + emit_alu(ALU_OP2_MULLO_INT, tx, 0, asrc(src[3]), asrc(4u));
- + emit_alu(ALU_OP2_LSHR_INT, src[3], 0, asrc(tw), asrc(tx));
- + emit_alu(ALU_OP2_AND_INT, src[3], 0, asrc(src[3]), asrc(0xFu));
- + }
- +
- + /* does this shader want a num layers from TXQ for a cube array? */
- + if (has_txq_cube_array_z_comp) {
- + int id = args.src[sampler_src_reg].sel;
- +
- + emit_alu(
- + ALU_OP1_MOV, tgsi_dst(SEL_Z), 0,
- + asrc(sh->get_kcache_value(R600_TXQ_CONST_BUFFER, id >> 2, id & 3)));
- +
- + /* disable writemask from texture instruction */
- + write_mask &= ~(1 << SEL_Z);
- + }
- +
- + opcode = info->isa_op;
- + if (texture == TGSI_TEXTURE_SHADOW1D || texture == TGSI_TEXTURE_SHADOW2D
- + || texture == TGSI_TEXTURE_SHADOWRECT
- + || texture == TGSI_TEXTURE_SHADOWCUBE
- + || texture == TGSI_TEXTURE_SHADOW1D_ARRAY
- + || texture == TGSI_TEXTURE_SHADOW2D_ARRAY
- + || texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- + switch (opcode) {
- + case FETCH_OP_SAMPLE:
- + opcode = FETCH_OP_SAMPLE_C;
- + break;
- + case FETCH_OP_SAMPLE_L:
- + opcode = FETCH_OP_SAMPLE_C_L;
- + break;
- + case FETCH_OP_SAMPLE_LB:
- + opcode = FETCH_OP_SAMPLE_C_LB;
- + break;
- + case FETCH_OP_SAMPLE_G:
- + opcode = FETCH_OP_SAMPLE_C_G;
- + break;
- + }
- + }
- +
- + fetch_node *f = create_fetch(opcode);
- + f->src = src;
- +
- + if (tgsi_op == TGSI_OPCODE_TXD) {
- + vvec d;
- + for (i = 2; i > 0; --i) {
- + d = get_vector_values(args.src[i].kind, args.src[i].sel,
- + args.src[i].swz);
- + f->src.insert(f->src.end(), d.begin(), d.end());
- + }
- + sampler_src_reg = 3;
- + }
- +
- + f->bc.sampler_id = args.src[sampler_src_reg].sel;
- + f->bc.resource_id = f->bc.sampler_id + R600_MAX_CONST_BUFFERS;
- +
- + for (i = 0; i < 4; ++i) {
- + unsigned s = (write_mask & (1 << i)) ? i : SEL_MASK;
- + f->bc.dst_sel[i] = s;
- + if (s != SEL_MASK)
- + f->dst[i] = get_arg_value(0, i);
- + }
- +
- + if (tgsi_op == TGSI_OPCODE_TXQ_LZ) {
- + FILLV4(f->src, sh->get_const_value(literal(0)));
- + }
- +
- + if (texture == TGSI_TEXTURE_CUBE || texture == TGSI_TEXTURE_SHADOWCUBE
- + || texture == TGSI_TEXTURE_CUBE_ARRAY
- + || texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- + f->src[0] = src[SEL_Y];
- + f->src[1] = src[SEL_X];
- + f->src[2] = src[SEL_W];
- + f->src[3] = src[SEL_Z]; /* route Z compare or Lod value into W */
- + }
- +
- + if (texture != TGSI_TEXTURE_RECT && texture != TGSI_TEXTURE_SHADOWRECT) {
- + f->bc.coord_type[0] = f->bc.coord_type[1] = 1;
- + }
- + f->bc.coord_type[2] = f->bc.coord_type[3] = 1;
- +
- + f->bc.offset[0] = offset[0];
- + f->bc.offset[1] = offset[1];
- + f->bc.offset[2] = offset[2];
- +
- + /* Put the depth for comparison in W.
- + * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
- + * Some instructions expect the depth in Z. */
- + if ((texture == TGSI_TEXTURE_SHADOW1D || texture == TGSI_TEXTURE_SHADOW2D
- + || texture == TGSI_TEXTURE_SHADOWRECT
- + || texture == TGSI_TEXTURE_SHADOW1D_ARRAY)
- + && opcode != FETCH_OP_SAMPLE_C_L && opcode != FETCH_OP_SAMPLE_C_LB) {
- + f->src[SEL_W] = f->src[SEL_Z];
- + }
- +
- + if (texture == TGSI_TEXTURE_1D_ARRAY
- + || texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
- + if (opcode == FETCH_OP_SAMPLE_C_L || opcode == FETCH_OP_SAMPLE_C_LB) {
- + /* the array index is read from Y */
- + f->bc.coord_type[SEL_Y] = 0;
- + } else {
- + /* the array index is read from Z */
- + f->bc.coord_type[SEL_Z] = 0;
- + f->src[SEL_Z] = f->src[SEL_Y];
- + }
- + } else if (texture == TGSI_TEXTURE_2D_ARRAY
- + || texture == TGSI_TEXTURE_SHADOW2D_ARRAY
- + || ((texture == TGSI_TEXTURE_CUBE_ARRAY
- + || texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- + && ctx.is_egcm()))
- + /* the array index is read from Z */
- + f->bc.coord_type[SEL_Z] = 0;
- +
- + /* mask unused source components */
- + if (opcode == FETCH_OP_SAMPLE) {
- + switch (texture) {
- + case TGSI_TEXTURE_2D:
- + case TGSI_TEXTURE_RECT:
- + f->src[SEL_Z] = NULL;
- + f->src[SEL_W] = NULL;
- + break;
- + case TGSI_TEXTURE_1D_ARRAY:
- + f->src[SEL_Y] = NULL;
- + f->src[SEL_W] = NULL;
- + break;
- + case TGSI_TEXTURE_1D:
- + f->src[SEL_Y] = NULL;
- + f->src[SEL_Z] = NULL;
- + f->src[SEL_W] = NULL;
- + break;
- + }
- + }
- +
- + emit_node(f);
- +
- + /* add shadow ambient support - gallium doesn't do it yet */
- + return 0;
- +}
- +
- +int tgsi_translator::ti_if() {
- + alu_node *a = build_alu(info->isa_op, sh->get_special_value(SV_EXEC_MASK),
- + 0, asrc(args.src[0], 0), asrc(0.0f));
- +
- + a->dst.insert(a->dst.begin(), 2, (value*)NULL);
- + emit_node(a);
- +
- + region_node *r = sh->create_region();
- + depart_node *d_true = sh->create_depart(r);
- + depart_node *d_false = sh->create_depart(r);
- + if_node *i = sh->create_if();
- +
- + i->cond = sh->get_special_value(SV_EXEC_MASK);
- + r->push_back(d_false);
- + d_false->push_back(i);
- + i->push_back(d_true);
- + emit_node(r);
- + if_stack.push(r);
- + current = d_true;
- + return 0;
- +}
- +
- +int tgsi_translator::ti_else() {
- + assert(!if_stack.empty());
- + region_node *r = if_stack.top();
- + depart_node *d_false = static_cast<depart_node*>(r->first);
- + assert(d_false && d_false->is_depart());
- + if_node *i = static_cast<if_node*>(d_false->first);
- + assert(i && i->is_if());
- + assert(!i->next);
- + container_node *c = sh->create_container();
- +
- + i->insert_after(c);
- + current = c;
- + return 0;
- +}
- +
- +int tgsi_translator::ti_endif() {
- + assert(!if_stack.empty());
- + region_node *r = if_stack.top();
- + depart_node *d = static_cast<depart_node*>(r->first);
- + assert(d && d->is_depart());
- + if_node *i = static_cast<if_node*>(d->first);
- + assert(i && i->is_if());
- + container_node *c = static_cast<container_node*>(i->next);
- + assert(!c || (current == c && c->is_container()));
- +
- + if (c)
- + c->expand();
- +
- + current = r->parent;
- + if_stack.pop();
- + return 0;
- +}
- +
- +int tgsi_translator::ti_begin_loop() {
- + region_node *r = sh->create_region();
- + repeat_node *d = sh->create_repeat(r);
- +
- + r->push_back(d);
- + emit_node(r);
- + loop_stack.push(r);
- + current = d;
- + return 0;
- +}
- +
- +int tgsi_translator::ti_loop_op() {
- + assert(!loop_stack.empty());
- + region_node *r = loop_stack.top();
- + container_node *rd;
- +
- + if (info->isa_op == CF_OP_LOOP_CONTINUE)
- + rd = sh->create_repeat(r);
- + else
- + rd = sh->create_depart(r);
- + if (!current->empty())
- + rd->move(current->begin(), current->end());
- + emit_node(rd);
- + sh->simplify_dep_rep(rd);
- + return 0;
- +}
- +
- +int tgsi_translator::ti_end_loop() {
- + assert(!loop_stack.empty());
- + region_node *r = loop_stack.top();
- +
- + current = r->parent;
- + loop_stack.pop();
- + return 0;
- +}
- +
- +int tgsi_translator::split_src_arg(tgsi_arg &ta) {
- + int k;
- + vvec t;
- + create_temps(t, 4);
- +
- + for (k = 0; k < 4; ++k) {
- + emit_alu(ALU_OP1_MOV, t[k], 0, asrc(get_arg_value(ta, k)));
- + }
- + ta.rel = 0;
- + ta.values = t;
- + ta.kind = VLK_TEMP;
- + return 0;
- +}
- +
- +int tgsi_translator::fetch_rel_const(tgsi_arg& ta) {
- + int i;
- + value* t = create_temp();
- + value* addr = get_tgsi_value(VLK_TGSI_ADDR, ta.rel_addr_index, 0);
- + emit_alu(ALU_OP2_ADD_INT, t, 0, asrc(addr), asrc((unsigned) ta.sel));
- +
- + vvec r;
- + create_temps(r, 4);
- +
- + fetch_node *f = create_fetch(FETCH_OP_VFETCH);
- + f->bc.resource_id = ta.kc_bank;
- + f->bc.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
- + f->bc.mega_fetch_count = 16;
- + f->bc.data_format = 0x23; // FMT_32_32_32_32_FLOAT;
- + f->bc.num_format_all = 2; /* NUM_FORMAT_SCALED */
- + f->bc.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
- + f->bc.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
- + f->bc.endian_swap = r600_endian_swap(32);
- + f->src[0] = t;
- + f->dst = r;
- +
- + for (i = 0; i < 4; ++i) {
- + f->bc.dst_sel[i] = ta.swz[i];
- + }
- +
- + emit_node(f);
- + ta.values = r;
- + ta.rel = 0;
- + ta.kind = VLK_TEMP;
- + return 0;
- +}
- +
- +} // namespace r600_sb
- diff --git a/src/gallium/drivers/r600/sb/sb_tgsi.h b/src/gallium/drivers/r600/sb/sb_tgsi.h
- new file mode 100644
- index 0000000..bbfb115
- --- /dev/null
- +++ b/src/gallium/drivers/r600/sb/sb_tgsi.h
- @@ -0,0 +1,331 @@
- +/*
- + * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
- + *
- + * Permission is hereby granted, free of charge, to any person obtaining a
- + * copy of this software and associated documentation files (the "Software"),
- + * to deal in the Software without restriction, including without limitation
- + * on the rights to use, copy, modify, merge, publish, distribute, sub
- + * license, and/or sell copies of the Software, and to permit persons to whom
- + * the Software is furnished to do so, subject to the following conditions:
- + *
- + * The above copyright notice and this permission notice (including the next
- + * paragraph) shall be included in all copies or substantial portions of the
- + * Software.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- + * USE OR OTHER DEALINGS IN THE SOFTWARE.
- + *
- + * Authors:
- + * Vadim Girlin
- + */
- +
- +
- +#ifndef SB_TGSI_TRANSLATOR_H_
- +#define SB_TGSI_TRANSLATOR_H_
- +
- +extern "C" {
- +#include "tgsi/tgsi_info.h"
- +#include "tgsi/tgsi_parse.h"
- +#include "tgsi/tgsi_scan.h"
- +}
- +
- +namespace r600_sb {
- +
- +struct alu_src {
- + value *v;
- + int abs;
- + int neg;
- +
- + alu_src(value *v = NULL, int abs = 0, int neg = 0)
- + : v(v), abs(abs), neg(neg) {}
- +};
- +
- +class tgsi_translator {
- +
- + static const int max_io = 40;
- +
- + struct shader_io {
- + r600_shader_io d;
- +
- + unsigned tgsi_index;
- + unsigned use_mask;
- + };
- +
- + struct tgsi_arg {
- + int file;
- + value_kind kind;
- + int sel;
- + int rel;
- + int abs;
- + int neg;
- + int rel_addr_index;
- + int rel_addr_chan;
- + int rel_array_id;
- + int kc_bank;
- + unsigned swz[4];
- + vvec values;
- + bool dst;
- + };
- +
- + struct tgsi_args {
- + tgsi_arg dst;
- + tgsi_arg src[TGSI_FULL_MAX_SRC_REGISTERS];
- + int nsrc;
- + };
- +
- +/* struct cf_stack_elem {
- + region_node *r;
- + };
- +*/
- + typedef int (tgsi_translator::*tgsi_inst_func)();
- + enum tgsi_inst_flags {
- + TIF_ALU_SWAPSRC01 = (1 << 0)
- +
- + };
- +
- + struct tgsi_inst_info {
- + const char *name;
- + unsigned tgsi_op;
- + int isa_op;
- + tgsi_inst_func func;
- + unsigned flags;
- + };
- +
- + static const tgsi_inst_info tgsi_info_table[TGSI_OPCODE_LAST];
- + static alu_src null_alu_src;
- +
- + typedef int *emit_func();
- +
- + sb_context &ctx;
- + shader *sh;
- + unsigned shader_id;
- +
- + r600_pipe_shader *ps;
- + r600_shader_key key;
- +
- + shader_io input[max_io];
- + shader_io output[max_io];
- +
- + int ninput;
- + int noutput;
- +
- + tgsi_token* tokens;
- + tgsi_parse_context parse;
- + tgsi_full_instruction *inst;
- + const tgsi_inst_info *info;
- +
- + int face_input;
- + int fragcoord_input;
- + int colors_used;
- + int clip_vertex_write;
- + int cv_output;
- + int nr_ps_max_color_exports;
- + int nr_ps_color_exports;
- + int vs_out_misc_write;
- + int vs_out_point_size;
- + int uses_tex_buffers;
- + int has_txq_cube_array_z_comp;
- +
- + // XXX probably unused now
- + unsigned indirect_vlk;
- +
- + int instanceid_index;
- + int vertexid_index;
- +
- + boolean two_side;
- +
- + unsigned clip_dist_write;
- + unsigned fs_write_all;
- + unsigned uses_kill;
- + unsigned tgsi_proc;
- + unsigned interp_mask;
- + unsigned file_offset[TGSI_FILE_COUNT];
- +
- + container_node *current;
- + std::stack<region_node*> if_stack;
- + std::stack<region_node*> loop_stack;
- +
- + unsigned write_mask;
- + int clamp;
- +
- + tgsi_args args;
- +
- + std::vector<uint32_t> literals;
- +
- +public:
- +
- + tgsi_translator(sb_context &ctx, r600_pipe_shader *ps, r600_shader_key key,
- + unsigned shader_id)
- + : ctx(ctx), sh(), shader_id(shader_id), ps(ps), key(key),
- + input(), output(), ninput(), noutput(),
- + tokens(), parse(),
- + face_input(-1), fragcoord_input(-1), colors_used(),
- + clip_vertex_write(), cv_output(), nr_ps_max_color_exports(),
- + nr_ps_color_exports(),
- + vs_out_misc_write(), vs_out_point_size(), uses_tex_buffers(),
- + has_txq_cube_array_z_comp(), indirect_vlk(),
- + instanceid_index(-1), vertexid_index(-1), two_side(),
- + clip_dist_write(), fs_write_all(), uses_kill(),
- + tgsi_proc(), interp_mask(),
- + file_offset(), current(),
- + if_stack(), loop_stack(), write_mask(), clamp(), args() {}
- +
- + shader* translate();
- +
- +private:
- +
- + int spi_sid(int name, int sid);
- +
- + int parse_declarations();
- + int parse_instructions();
- +
- + int parse_property();
- + int parse_declaration();
- + int parse_immediate();
- + int parse_instruction();
- +
- + int split_src_arg(tgsi_arg &ta);
- +
- + int emit_inputs();
- + int get_ij(shader_io &in);
- + alu_packed_node* build_interp(shader_io& in, unsigned type);
- + alu_group_node* build_interp_flat(shader_io& in);
- +
- + int emit_exports();
- + int emit_fake_export(unsigned type);
- + int emit_export(shader_io &o, unsigned type, unsigned base, unsigned *swz,
- + unsigned tgsi_index);
- +
- + int update_pipe_shader();
- +
- + value* get_arg_value(unsigned index, unsigned chan);
- + value* get_arg_value(tgsi_arg &ta, unsigned chan);
- +
- + value* get_tgsi_value(value_kind kind, unsigned index, unsigned chan);
- +
- + value* tgsi_dst(unsigned chan) { return get_arg_value(0, chan); }
- + value_kind file_to_value_kind(unsigned file);
- +
- + alu_node* create_alu(unsigned op);
- +
- + alu_node* build_alu(unsigned op, int chan, int dstchan = -1);
- +
- + alu_node* build_alu(unsigned op, value *dst, int clamp,
- + value *s0, int s0abs, int s0neg,
- + value *s1, int s1abs, int s1neg,
- + value *s2, int s2abs, int s2neg);
- +
- + alu_node* build_alu(unsigned op, value *dst, int clamp,
- + const alu_src& s0) {
- + return build_alu(op, dst, clamp, s0.v, s0.abs, s0.neg, NULL, 0, 0,
- + NULL, 0, 0);
- + }
- + alu_node* build_alu(unsigned op, value *dst, int clamp,
- + const alu_src& s0, const alu_src& s1) {
- + return build_alu(op, dst, clamp, s0.v, s0.abs, s0.neg,
- + s1.v, s1.abs, s1.neg, NULL, 0, 0);
- + }
- + alu_node* build_alu(unsigned op, value *dst, int clamp,
- + const alu_src& s0, const alu_src& s1,
- + const alu_src& s2) {
- + return build_alu(op, dst, clamp, s0.v, s0.abs, s0.neg,
- + s1.v, s1.abs, s1.neg, s2.v, s2.abs, s2.neg);
- + }
- +
- + int emit_alu(unsigned op, int chan, int dstchan = -1);
- +
- + int emit_alu(unsigned op, value *dst, int clamp,
- + value *s0, int s0abs, int s0neg,
- + value *s1, int s1abs, int s1neg,
- + value *s2, int s2abs, int s2neg);
- +
- + int emit_alu(unsigned op, value *dst, int clamp,
- + const alu_src& s0) {
- + return emit_alu(op, dst, clamp, s0.v, s0.abs, s0.neg, NULL, 0, 0,
- + NULL, 0, 0);
- + }
- + int emit_alu(unsigned op, value *dst, int clamp,
- + const alu_src& s0, const alu_src& s1) {
- + return emit_alu(op, dst, clamp, s0.v, s0.abs, s0.neg,
- + s1.v, s1.abs, s1.neg, NULL, 0, 0);
- + }
- + int emit_alu(unsigned op, value *dst, int clamp,
- + const alu_src& s0, const alu_src& s1,
- + const alu_src& s2) {
- + return emit_alu(op, dst, clamp, s0.v, s0.abs, s0.neg,
- + s1.v, s1.abs, s1.neg, s2.v, s2.abs, s2.neg);
- + }
- +
- + void emit_node(node *n);
- + void begin_group();
- + void end_group();
- + vvec get_vector_values(value_kind kind, unsigned tgsi_index,
- + unsigned *swz = NULL);
- +
- + int ti_unsupported();
- + int ti_alu();
- + int ti_dot();
- + int ti_repl();
- + int ti_lit();
- + int ti_trig();
- + int ti_scs();
- + value* prepare_trig(alu_src s);
- +
- + int ti_exp();
- + int ti_log();
- + int ti_dst();
- + int ti_lrp();
- + int ti_pow();
- + int ti_replicate(value *t);
- + int ti_xpd();
- + int ti_kill();
- + int ti_arl();
- + int ti_ssg();
- + int ti_cmp();
- + int ti_umad();
- + int ti_f2iu();
- + int ti_ineg();
- + int ti_iabs();
- + int ti_divmod();
- +
- + int ti_tex();
- + int ti_buffer_txq();
- + int ti_vtx_fetch();
- +
- + int ti_if();
- + int ti_else();
- + int ti_endif();
- +
- + int ti_begin_loop();
- + int ti_loop_op();
- + int ti_end_loop();
- +
- + alu_src asrc(value *v, int abs = 0, int neg = 0);
- + alu_src asrc(literal l, int abs = 0, int neg = 0);
- + alu_src asrc(float f, int abs = 0, int neg = 0);
- + alu_src asrc(uint32_t u, int abs = 0, int neg = 0);
- + alu_src asrc(tgsi_arg& ta, int chan);
- + alu_src asrc(tgsi_arg& ta, int chan, int abs, int neg);
- +
- + value* create_temp(int chan = 0) { return sh->create_temp_value(chan); }
- + void create_temps(vvec &temps, int n) {
- + temps.resize(n);
- + for (int i = 0; i < n; ++i)
- + temps[i] = create_temp();
- + }
- +
- + uint32_t get_immediate(sel_chan sc);
- +
- + fetch_node* create_fetch(unsigned op);
- +
- + int fetch_rel_const(tgsi_arg& ta);
- +};
- +
- +} // namespace r600_sb
- +
- +#endif /* SB_TGSI_TRANSLATOR_H_ */
- diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp
- index 00aee66..ad2e78b 100644
- --- a/src/gallium/drivers/r600/sb/sb_valtable.cpp
- +++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp
- @@ -61,13 +61,55 @@ sb_ostream& operator << (sb_ostream &o, value &v) {
- }
- case VLK_REG:
- - o << "R" << v.select.sel() << "."
- + if (v.rel) {
- + o << "AREG" << v.select;
- + o << "[";
- + o << *v.rel;
- + o << "]";
- + o << "_" << v.uid;
- + } else
- + o << "R" << v.select.sel() << "."
- + << chans[v.select.chan()];
- + break;
- + case VLK_TGSI_INPUT:
- + if (v.rel) {
- + o << "AIN" << v.select;
- + o << "[";
- + o << *v.rel;
- + o << "]";
- + o << "_" << v.uid;
- + } else
- + o << "IN" << v.select.sel() << "."
- + << chans[v.select.chan()];
- + break;
- + case VLK_TGSI_OUTPUT:
- + if (v.rel) {
- + o << "AOUT" << v.select;
- + o << "[";
- + o << *v.rel;
- + o << "]";
- + o << "_" << v.uid;
- + } else
- + o << "OUT" << v.select.sel() << "."
- + << chans[v.select.chan()];
- + break;
- + case VLK_TGSI_TEMP:
- + if (v.rel) {
- + o << "ATEMP" << v.select;
- + o << "[";
- + o << *v.rel;
- + o << "]";
- + o << "_" << v.uid;
- + } else
- + o << "TEMP" << v.select.sel() << "."
- + << chans[v.select.chan()];
- + break;
- + case VLK_TGSI_ADDR:
- + o << "ADDR" << v.select.sel() << "."
- << chans[v.select.chan()];
- -
- break;
- - case VLK_KCACHE: {
- + case VLK_KCACHE:
- o << "C" << v.select.sel() << "." << chans[v.select.chan()];
- - }
- break;
- case VLK_CONST:
- o << v.literal_value.f << "|";
- @@ -80,16 +122,6 @@ sb_ostream& operator << (sb_ostream &o, value &v) {
- case VLK_TEMP:
- o << "t" << v.select.sel() - shader::temp_regid_offset;
- break;
- - case VLK_REL_REG:
- -
- - o << "A" << v.select;
- - o << "[";
- - o << *v.rel;
- - o << "]";
- -
- - o << "_" << v.uid;
- -
- - break;
- case VLK_UNDEF:
- o << "undef";
- break;
- @@ -113,7 +145,7 @@ sb_ostream& operator << (sb_ostream &o, value &v) {
- sel_chan g;
- - if (v.is_rel()) {
- + if (v.array) {
- g = v.array->gpr;
- } else {
- g = v.gpr;
- @@ -542,7 +574,8 @@ bool ra_constraint::check() {
- return true;
- }
- -bool gpr_array::is_dead() {
- +bool rel_array::is_dead() {
- + // XXX maybe do something here?
- return false;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement