From 417e6f47abc3021765b62d482a61bdffaf5a3f09 Mon Sep 17 00:00:00 2001 From: Yury Gribov Date: Mon, 4 Mar 2024 13:24:08 +0300 Subject: [PATCH] This commit illustrates the possibility of adding support for superscalar architectures in Unison. --- src/solvers/gecode/models/completemodel.cpp | 10 +++ src/solvers/gecode/models/globalmodel.cpp | 6 +- src/solvers/gecode/models/localmodel.cpp | 14 +++ src/solvers/gecode/models/model.cpp | 90 +++++++++++++++++-- src/solvers/gecode/models/model.hpp | 17 ++++ src/solvers/gecode/models/simplemodel.cpp | 1 + src/unison/src/Unison/Tools/Export.hs | 6 +- .../Unison/Tools/Export/BundleOperations.hs | 13 +-- 8 files changed, 141 insertions(+), 16 deletions(-) diff --git a/src/solvers/gecode/models/completemodel.cpp b/src/solvers/gecode/models/completemodel.cpp index 16c1edf9..31b3d30c 100644 --- a/src/solvers/gecode/models/completemodel.cpp +++ b/src/solvers/gecode/models/completemodel.cpp @@ -84,6 +84,7 @@ CompleteModel::CompleteModel(Parameters * p_input, ModelOptions * p_options, v_r = int_var_array(T().size(), -1, input->RA.size() - 1); v_i = int_var_array(O().size(), 0, input->I.size() - 1); v_c = int_var_array(O().size(), 0, max_of(input->maxc)); + v_ff = int_var_array(O().size(), 0, W * max_of(input->maxc)); if (!P().empty()) { v_y = int_var_array(P().size(), 0, input->T.size() - 1); } @@ -91,9 +92,13 @@ CompleteModel::CompleteModel(Parameters * p_input, ModelOptions * p_options, v_ry = int_var_array(P().size(), -1, input->RA.size() - 1); v_a = bool_var_array(O().size(), 0, 1); v_ls = int_var_array(T().size(), 0, max_of(input->maxc)); + v_ls_ff = int_var_array(T().size(), 0, W * max_of(input->maxc)); v_ld = int_var_array(T().size(), 0, max_of(input->maxc)); + v_ld_ff = int_var_array(T().size(), 0, W * max_of(input->maxc)); v_le = int_var_array(T().size(), 0, max_of(input->maxc) + maybe_max_of(0, input->minlive)); + v_le_ff = int_var_array(T().size(), 0, + W * (max_of(input->maxc) + maybe_max_of(0, input->minlive))); v_al = bool_var_array(T().size() * input->RS.size(), 0, 1); v_u = bool_var_array(input->nu, 0, 1); v_us = int_var_array(T().size(), 0, O().size()); @@ -370,6 +375,10 @@ string CompleteModel::solution_to_json() const { for (operation o : input->O) cs.push_back(a(o).val() ? c(o).val() : -1); + vector ffs; + for (operation o : input->O) + ffs.push_back(a(o).val() ? ff(o).val() : -1); + vector ys; for (operand p : input->P) ys.push_back(input->temps[p][y(p).val()]); @@ -378,6 +387,7 @@ string CompleteModel::solution_to_json() const { pOs << "\"registers\":" << to_json(rs) << ","; pOs << "\"instructions\":" << to_json(is) << ","; pOs << "\"cycles\":" << to_json(cs) << ","; + pOs << "\"fetches\":" << to_json(ffs) << ","; pOs << "\"temporaries\":" << to_json(ys); pOs << "}"; diff --git a/src/solvers/gecode/models/globalmodel.cpp b/src/solvers/gecode/models/globalmodel.cpp index e719c39f..9f9d88e7 100644 --- a/src/solvers/gecode/models/globalmodel.cpp +++ b/src/solvers/gecode/models/globalmodel.cpp @@ -709,6 +709,8 @@ void GlobalModel::post_complete_branchers(unsigned int s) { branch(*this, v_c, INT_VAR_NONE(), INT_VAL_MIN(), &schedulable, &print_global_cycle_decision); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); + branch(*this, v_r, INT_VAR_NONE(), INT_VAL_RND(r), &global_assignable, &print_global_register_decision); @@ -786,8 +788,10 @@ void GlobalModel::apply_solution(LocalModel * ls) { constraint(i(o) == ls->i(o)); for (operation o : input->ops[b]) - if (!ls->is_inactive(o)) + if (!ls->is_inactive(o)) { constraint(c(o) == ls->c(o)); + constraint(ff(o) == ls->ff(o)); + } for (operand p : input->ope[b]) constraint(y(p) == ls->y(p)); diff --git a/src/solvers/gecode/models/localmodel.cpp b/src/solvers/gecode/models/localmodel.cpp index 7b8604c0..92466545 100644 --- a/src/solvers/gecode/models/localmodel.cpp +++ b/src/solvers/gecode/models/localmodel.cpp @@ -86,6 +86,7 @@ LocalModel::LocalModel(Parameters * p_input, ModelOptions * p_options, v_r = int_var_array(T().size(), -1, input->RA.size() - 1); v_i = int_var_array(O().size(), 0, input->I.size() - 1); v_c = int_var_array(O().size(), 0, input->maxc[b]); + v_ff = int_var_array(O().size(), 0, W * input->maxc[b]); if (!P().empty()) { v_y = int_var_array(P().size(), 0, input->T.size() - 1); } @@ -93,9 +94,13 @@ LocalModel::LocalModel(Parameters * p_input, ModelOptions * p_options, v_ry = int_var_array(P().size(), -1, input->RA.size() - 1); v_a = bool_var_array(O().size(), 0, 1); v_ls = int_var_array(T().size(), 0, input->maxc[b]); + v_ls_ff = int_var_array(T().size(), 0, W * input->maxc[b]); v_ld = int_var_array(T().size(), 0, input->maxc[b]); + v_ld_ff = int_var_array(T().size(), 0, W * input->maxc[b]); v_le = int_var_array(T().size(), 0, input->maxc[b] + maybe_max_of(0, input->minlive)); + v_le_ff = int_var_array(T().size(), 0, + W * (input->maxc[b] + maybe_max_of(0, input->minlive))); v_al = bool_var_array(T().size() * input->RS.size(), 0, 1); v_u = bool_var_array(input->bnu[b], 0, 1); v_us = int_var_array(T().size(), 0, O().size()); @@ -291,6 +296,7 @@ void LocalModel::post_aggressive_branchers(void) { branch(*this, &LocalModel::post_before_scheduling_constraints_in_space); branch_on_pressure_scheduling(*this, v_c); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); branch(*this, v_r, INT_VAR_SIZE_MIN(), INT_VAL_MIN(), &assignable, &print_register_decision); @@ -314,6 +320,7 @@ void LocalModel::post_trivial_branchers(void) { branch(*this, v_c, INT_VAR_MIN_MIN(), INT_VAL_MIN(), &schedulable, &print_cycle_decision); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); branch(*this, v_r, INT_VAR_SIZE_MIN(), INT_VAL_MIN(), &assignable, &print_register_decision); @@ -340,6 +347,7 @@ void LocalModel::post_minimum_cost_branchers(void) { branch(*this, v_c, INT_VAR_MIN_MIN(), INT_VAL_MIN(), &schedulable, &print_cycle_decision); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); branch(*this, v_r, INT_VAR_SIZE_MIN(), INT_VAL_MIN(), &assignable, &print_register_decision); @@ -363,6 +371,7 @@ void LocalModel::post_fail_first_branchers(void) { branch(*this, v_c, INT_VAR_SIZE_MIN(), INT_VAL_MIN(), &schedulable, &print_cycle_decision); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); branch(*this, v_r, INT_VAR_SIZE_MIN(), INT_VAL_MIN(), &assignable, &print_register_decision); @@ -376,6 +385,7 @@ void LocalModel::post_conservative_branchers(void) { branch(*this, &LocalModel::post_before_scheduling_constraints_in_space); branch_on_pressure_scheduling(*this, v_c); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); branch(*this, v_r, INT_VAR_SIZE_MIN(), INT_VAL_MIN(), &assignable, &print_register_decision); @@ -451,6 +461,7 @@ void LocalModel::apply_solution(const GlobalModel * gs) { for (operation o : input->ops[b]) { copy_domain(*this, gs->i(o), i(o)); copy_domain(*this, gs->c(o), c(o)); + copy_domain(*this, gs->ff(o), ff(o)); copy_domain(*this, gs->a(o), a(o)); } @@ -468,6 +479,9 @@ void LocalModel::apply_solution(const GlobalModel * gs) { copy_domain(*this, gs->ls(t), ls(t)); copy_domain(*this, gs->ld(t), ld(t)); copy_domain(*this, gs->le(t), le(t)); + copy_domain(*this, gs->ls_ff(t), ls_ff(t)); + copy_domain(*this, gs->ld_ff(t), ld_ff(t)); + copy_domain(*this, gs->le_ff(t), le_ff(t)); } for (unsigned int n = 0; n < input->N; n++) { diff --git a/src/solvers/gecode/models/model.cpp b/src/solvers/gecode/models/model.cpp index 330d5b4b..41406950 100644 --- a/src/solvers/gecode/models/model.cpp +++ b/src/solvers/gecode/models/model.cpp @@ -454,6 +454,7 @@ Model::Model(Model& m) : v_r.update(*this, m.v_r); v_i.update(*this, m.v_i); v_c.update(*this, m.v_c); + v_ff.update(*this, m.v_ff); v_y.update(*this, m.v_y); v_x.update(*this, m.v_x); v_ry.update(*this, m.v_ry); @@ -461,6 +462,9 @@ Model::Model(Model& m) : v_ls.update(*this, m.v_ls); v_ld.update(*this, m.v_ld); v_le.update(*this, m.v_le); + v_ls_ff.update(*this, m.v_ls_ff); + v_ld_ff.update(*this, m.v_ld_ff); + v_le_ff.update(*this, m.v_le_ff); v_al.update(*this, m.v_al); v_u.update(*this, m.v_u); v_us.update(*this, m.v_us); @@ -497,19 +501,27 @@ void Model::post_issue_cycle_domains(block b) { for (operation o : input->ops[b]) { constraint(c(o) <= bmaxc); constraint(c(o) <= c(input->out[b])); + constraint(ff(o) <= W * bmaxc); + constraint(ff(o) <= ff(input->out[b])); } for (temporary t : input->tmp[b]) { constraint(ls(t) <= bmaxc); - constraint(le(t) <= (bmaxc + input->minlive[t])); + constraint(le(t) <= bmaxc + input->minlive[t]); + + constraint(ls_ff(t) <= W * bmaxc); + constraint(le_ff(t) <= W * (bmaxc + input->minlive[t])); } // in-delimiters always start in cycle 0 constraint(c(input->in[b]) == 0); + constraint(ff(input->in[b]) == 0); // the rest of operations start at least in cycle 1 - for (operation o : input->ops[b]) + for (operation o : input->ops[b]) { if (o != input->in[b]) constraint(c(o) >= 1); + if (o != input->in[b]) constraint(ff(o) >= W); + } } @@ -564,8 +576,10 @@ void Model::post_live_start_definition(block b) { // The live range of a temporary starts at the issue cycle of its definer: - for (temporary t : input->tmp[b]) + for (temporary t : input->tmp[b]) { constraint(ls(t) == c(input->oper[input->definer[t]])); + constraint(ls_ff(t) == ff(input->oper[input->definer[t]])); + } } @@ -574,8 +588,10 @@ void Model::post_live_duration_definition(block b) { // The live range of a temporary t is as long as the distance between its live // end and its live start: - for (temporary t : input->tmp[b]) + for (temporary t : input->tmp[b]) { constraint(ld(t) == (le(t) - ls(t))); + constraint(ld_ff(t) == (le_ff(t) - ls_ff(t))); + } } @@ -590,6 +606,12 @@ void Model::post_live_end_definition(block b) { uc << var(ite(u(p, t), c(input->oper[p]), 0)); uc << var(ls(t) + input->minlive[t]); constraint(le(t) == max(uc)); + + IntVarArgs uc_ff; + for (operand p : input->users[t]) + uc_ff << var(ite(u(p, t), ff(input->oper[p]), 0)); + uc_ff << var(ls_ff(t) + W * input->minlive[t]); + constraint(le_ff(t) == max(uc_ff)); } } @@ -727,6 +749,7 @@ void Model::post_basic_model_constraints(block b) { post_prescheduling_constraints(b); post_bypassing_constraints(b); post_adhoc_constraints(b); + post_fetch_constraints(b); } @@ -813,15 +836,15 @@ void Model::post_disjoint_live_ranges_constraints(block b) { IntVarArgs bld, bw, bre, br = temps_to_var_args(v_r, input->tmp[b]), - bls = temps_to_var_args(v_ls, input->tmp[b]), - ble = temps_to_var_args(v_le, input->tmp[b]); + bls = temps_to_var_args(v_ls_ff, input->tmp[b]), + ble = temps_to_var_args(v_le_ff, input->tmp[b]); BoolVarArgs bm; for (temporary t : input->tmp[b]) { - bld << ld(t); + bld << ld_ff(t); bw << var(input->width[t]); bre << var(r(t) + input->width[t]); - bm << var(l(t) && (ld(t) > 0)); + bm << var(l(t) && (ld_ff(t) > 0)); } nooverlap(*this, br, bw, bre, bls, bld, ble, bm, ipl); @@ -1138,6 +1161,51 @@ void Model::post_adhoc_constraints(block b) { } +void Model::post_fetch_constraints(block b) { + for (operation o : input->ops[b]) { + constraint(a(o) >> (c(o) * W <= ff(o))); + constraint(a(o) >> (ff(o) < (c(o) + 1) * W)); + } + + BoolVarArgs acts; + IntVarArgs vals; + for(operation o : input->ops[b]) { + acts << a(o); + vals << ff(o); + } + Gecode::distinct(*this, acts, vals, IPL_DOM); + + // Branch should be the last operation in bundle + for (operation o : input->ops[b]) { + if (input->type[o] == BRANCH) { + for (operation o2 : input->ops[b]) { + if (o2 == o) + continue; + if (input->type[o2] == OUT) + continue; + // Virtual operations have special constraints and + // may sometimes be scheduled after branches. + // They are not present in generated MIR so won't cause + // verification errors. + if (input->type[o2] == DEFINE || input->type[o2] == KILL) + continue; + constraint(ff(o2) < ff(o)); + } + break; + } + } + + // Call should be the last operation in bundle + for (operation o : input->ops[b]) { + if (input->type[o] == CALL) { + for (operation o2 : input->ops[b]) { + if (o2 != o) + constraint((c(o2) == c(o)) >> (ff(o2) < ff(o))); + } + } + } +} + void Model::post_improved_model_constraints(block b) { if (!options->disable_improving()) { @@ -2301,6 +2369,11 @@ void Model::print(ostream & pOs, block b) const { pOs << endl << endl; + for (operation o : os) + pOs << left << "ff" << setw(tColWidth) << ff(o) << " "; + + pOs << endl << endl; + for (operand p : ps) pOs << left << "p" << setw(tColWidth) << p << " "; @@ -2341,6 +2414,7 @@ void Model::compare(const Space& sp, std::ostream& pOs) const { pOs << "r : " << compare("r", "t", temp_index, v_r, m.v_r) << endl; pOs << "i : " << compare("i", "o", instr_index, v_i, m.v_i) << endl; pOs << "c : " << compare("c", "o", instr_index, v_c, m.v_c) << endl; + pOs << "ff : " << compare("ff", "o", instr_index, v_ff, m.v_ff) << endl; pOs << "y : " << compare("y", "p", opr_index, v_y, m.v_y) << endl; pOs << endl; diff --git a/src/solvers/gecode/models/model.hpp b/src/solvers/gecode/models/model.hpp index 627ff91d..85522a2f 100644 --- a/src/solvers/gecode/models/model.hpp +++ b/src/solvers/gecode/models/model.hpp @@ -67,6 +67,10 @@ class Model : public IntLexMinimizeSpace { public: + // Must be larger than maximum issue size due to pseudo-insns (kill, low, etc.) + // TODO: this should of course come from model.json + static const int W = 8; + Parameters * input; ModelOptions * options; IntPropLevel ipl; @@ -85,6 +89,7 @@ class Model : public IntLexMinimizeSpace { // c[o]: issue cycle of operation o relative to the beginning of its block IntVarArray v_c; + IntVarArray v_ff; // y[p]: temporary that is connected to operand p IntVarArray v_y; @@ -102,12 +107,15 @@ class Model : public IntLexMinimizeSpace { // ls[t]: start of live range of temporary t IntVarArray v_ls; + IntVarArray v_ls_ff; // ld[t]: duration of live range of temporary t IntVarArray v_ld; + IntVarArray v_ld_ff; // le[t]: end of live range of temporary t IntVarArray v_le; + IntVarArray v_le_ff; // al[t][rs]: whether temporary t is allocated to register space rs // (some of the register atoms of t overlap with rs) @@ -216,6 +224,8 @@ class Model : public IntLexMinimizeSpace { IntVar c(operation o) const { return v_c[instr(o)]; } + IntVar ff(operation o) const { return v_ff[instr(o)]; } + IntVar y(operand p) const { return v_y[opr(p)]; } virtual BoolVar x(operand p) const = 0; @@ -232,6 +242,12 @@ class Model : public IntLexMinimizeSpace { IntVar le(temporary t) const { return v_le[temp(t)]; } + IntVar ls_ff(temporary t) const { return v_ls_ff[temp(t)]; } + + IntVar ld_ff(temporary t) const { return v_ld_ff[temp(t)]; } + + IntVar le_ff(temporary t) const { return v_le_ff[temp(t)]; } + BoolVar al(register_space rs, temporary t) const { return v_al[temp(t) * input->RS.size() + rs]; } @@ -374,6 +390,7 @@ class Model : public IntLexMinimizeSpace { void post_prescheduling_constraints(block b); void post_bypassing_constraints(block b); void post_adhoc_constraints(block b); + void post_fetch_constraints(block b); void post_improved_model_constraints(block b); void post_null_register_constraints(block b); diff --git a/src/solvers/gecode/models/simplemodel.cpp b/src/solvers/gecode/models/simplemodel.cpp index f5096cc6..54f33138 100644 --- a/src/solvers/gecode/models/simplemodel.cpp +++ b/src/solvers/gecode/models/simplemodel.cpp @@ -78,4 +78,5 @@ void SimpleModel::post_trivial_branchers(void) { branch(*this, v_r, INT_VAR_NONE(), INT_VAL_MIN()); // TODO: do not assign cycles to inactive operations branch(*this, v_c, INT_VAR_NONE(), INT_VAL_MIN()); + branch(*this, v_ff, INT_VAR_NONE(), INT_VAL_MIN()); } diff --git a/src/unison/src/Unison/Tools/Export.hs b/src/unison/src/Unison/Tools/Export.hs index 406d9bf5..28fe3d14 100644 --- a/src/unison/src/Unison/Tools/Export.hs +++ b/src/unison/src/Unison/Tools/Export.hs @@ -86,12 +86,14 @@ parseSolution json = Nothing -> error ("error parsing JSON input") Just (Object s) -> s cycles = sol HM.! "cycles" + fetches = sol HM.! "fetches" instructions = sol HM.! "instructions" registers = sol HM.! "registers" temporaries = sol HM.! "temporaries" has_sol = sol HM.! "has_solution" in if (solutionFromJson has_sol :: Bool) then Just (solutionFromJson cycles :: [Integer], + solutionFromJson fetches :: [Integer], solutionFromJson instructions :: [InstructionId], solutionFromJson registers :: [RegisterAtom], solutionFromJson temporaries :: [TemporaryId]) @@ -102,7 +104,7 @@ solutionFromJson object = Error e -> error ("error converting JSON input:\n" ++ show e) Success s -> s -uniTransformations (cycles, instructions, registers, temporaries) +uniTransformations (cycles, fetches, instructions, registers, temporaries) (removeReds, keepNops, tight) = [(assignRegisters tight registers, "assignRegisters", True), (selectTemporaries temporaries, "selectTemporaries", True), @@ -113,7 +115,7 @@ uniTransformations (cycles, instructions, registers, temporaries) (runTargetTransforms ExportPostOffs, "runTargetTransforms", True), (lowerFrameSize, "lowerFrameSize", True), (directFrame, "directFrame", True), - (bundleOperations cycles, "bundleOperations", True), + (bundleOperations cycles fetches, "bundleOperations", True), (removeRedundancies, "removeRedundancies", removeReds), (runTargetTransforms ExportPreLow, "runTargetTransforms", True), (lowerFrameIndices, "lowerFrameIndices", True), diff --git a/src/unison/src/Unison/Tools/Export/BundleOperations.hs b/src/unison/src/Unison/Tools/Export/BundleOperations.hs index f0cb4f16..854f961f 100644 --- a/src/unison/src/Unison/Tools/Export/BundleOperations.hs +++ b/src/unison/src/Unison/Tools/Export/BundleOperations.hs @@ -13,23 +13,26 @@ module Unison.Tools.Export.BundleOperations (bundleOperations) where import qualified Data.Map as M import qualified Data.Set as S +import qualified Data.List as L import Unison -bundleOperations cycles f @ Function {fCode = code} _ = +bundleOperations cycles fetches f @ Function {fCode = code} _ = let i2c = M.fromList (zip (flatten code) cycles) - bcode = map (toBundleBlock i2c) code + i2f = M.fromList (zip (flatten code) fetches) + bcode = map (toBundleBlock i2c i2f) code in f {fCode = bcode} -toBundleBlock i2c b @ Block {bCode = code} = +toBundleBlock i2c i2f b @ Block {bCode = code} = let code' = filter isActive code is = S.fromList code' i2c' = M.filterWithKey (\i _ -> S.member i is) i2c c2is = foldr mapAppend M.empty [(c, [i]) | (i, c) <- M.toList i2c'] cs = zip [0 .. fst (M.findMax c2is)] (repeat []) c2is' = M.toList $ foldr mapAppend c2is cs - bcode = map (Bundle . snd) c2is' + sortByFetch is = L.sortBy (\i1 i2 -> compare (i2f M.! i1) (i2f M.! i2)) is + bcode = map (Bundle . sortByFetch . snd) c2is' in b {bCode = bcode} isActive o | oInstructions o == [mkNullInstruction] = False -isActive _ = True \ No newline at end of file +isActive _ = True