| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 This is an optimized C++ implemention of the Relooper algorithm originally | |
| 3 developed as part of Emscripten. This implementation includes optimizations | |
| 4 added since the original academic paper [1] was published about it, and is | |
| 5 written in an LLVM-friendly way with the goal of inclusion in upstream | |
| 6 LLVM. | |
| 7 | |
| 8 [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In Proceedings
of the ACM international conference companion on Object oriented programming sy
stems languages and applications companion (SPLASH '11). ACM, New York, NY, USA,
301-312. DOI=10.1145/2048147.2048224 http://doi.acm.org/10.1145/2048147.2048224 | |
| 9 */ | |
| 10 | |
| 11 #include <assert.h> | |
| 12 #include <stdio.h> | |
| 13 #include <stdarg.h> | |
| 14 | |
| 15 #ifdef __cplusplus | |
| 16 | |
| 17 #include <map> | |
| 18 #include <deque> | |
| 19 #include <set> | |
| 20 | |
| 21 struct Block; | |
| 22 struct Shape; | |
| 23 | |
| 24 // Info about a branching from one block to another | |
| 25 struct Branch { | |
| 26 enum FlowType { | |
| 27 Direct = 0, // We will directly reach the right location through other mea
ns, no need for continue or break | |
| 28 Break = 1, | |
| 29 Continue = 2, | |
| 30 Nested = 3 // This code is directly reached, but we must be careful to en
sure it is nested in an if - it is not reached | |
| 31 // unconditionally, other code paths exist alongside it that w
e need to make sure do not intertwine | |
| 32 }; | |
| 33 Shape *Ancestor; // If not NULL, this shape is the relevant one for purposes o
f getting to the target block. We break or continue on it | |
| 34 Branch::FlowType Type; // If Ancestor is not NULL, this says whether to break
or continue | |
| 35 bool Labeled; // If a break or continue, whether we need to use a label | |
| 36 const char *Condition; // The condition for which we branch. For example, "my_
var == 1". Conditions are checked one by one. One of the conditions should have
NULL as the condition, in which case it is the default | |
| 37 const char *Code; // If provided, code that is run right before the branch is
taken. This is useful for phis | |
| 38 | |
| 39 Branch(const char *ConditionInit, const char *CodeInit=NULL); | |
| 40 ~Branch(); | |
| 41 | |
| 42 // Prints out the branch | |
| 43 void Render(Block *Target, bool SetLabel); | |
| 44 }; | |
| 45 | |
| 46 typedef std::set<Block*> BlockSet; | |
| 47 typedef std::map<Block*, Branch*> BlockBranchMap; | |
| 48 | |
| 49 // Represents a basic block of code - some instructions that end with a | |
| 50 // control flow modifier (a branch, return or throw). | |
| 51 struct Block { | |
| 52 // Branches become processed after we finish the shape relevant to them. For e
xample, | |
| 53 // when we recreate a loop, branches to the loop start become continues and ar
e now | |
| 54 // processed. When we calculate what shape to generate from a set of blocks, w
e ignore | |
| 55 // processed branches. | |
| 56 // Blocks own the Branch objects they use, and destroy them when done. | |
| 57 BlockBranchMap BranchesOut; | |
| 58 BlockSet BranchesIn; | |
| 59 BlockBranchMap ProcessedBranchesOut; | |
| 60 BlockSet ProcessedBranchesIn; | |
| 61 Shape *Parent; // The shape we are directly inside | |
| 62 int Id; // A unique identifier, defined when added to relooper. Note that this
uniquely identifies a *logical* block - if we split it, the two instances have
the same content *and* the same Id | |
| 63 const char *Code; // The string representation of the code in this block. Owni
ng pointer (we copy the input) | |
| 64 const char *BranchVar; // A variable whose value determines where we go; if th
is is not NULL, emit a switch on that variable | |
| 65 bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching
us requires setting the label variable | |
| 66 | |
| 67 Block(const char *CodeInit, const char *BranchVarInit); | |
| 68 ~Block(); | |
| 69 | |
| 70 void AddBranchTo(Block *Target, const char *Condition, const char *Code=NULL); | |
| 71 | |
| 72 // Prints out the instructions code and branchings | |
| 73 void Render(bool InLoop); | |
| 74 }; | |
| 75 | |
| 76 // Represents a structured control flow shape, one of | |
| 77 // | |
| 78 // Simple: No control flow at all, just instructions. If several | |
| 79 // blocks, then | |
| 80 // | |
| 81 // Multiple: A shape with more than one entry. If the next block to | |
| 82 // be entered is among them, we run it and continue to | |
| 83 // the next shape, otherwise we continue immediately to the | |
| 84 // next shape. | |
| 85 // | |
| 86 // Loop: An infinite loop. | |
| 87 // | |
| 88 // Emulated: Control flow is managed by a switch in a loop. This | |
| 89 // is necessary in some cases, for example when control | |
| 90 // flow is not known until runtime (indirect branches, | |
| 91 // setjmp returns, etc.) | |
| 92 // | |
| 93 | |
| 94 struct SimpleShape; | |
| 95 struct LabeledShape; | |
| 96 struct MultipleShape; | |
| 97 struct LoopShape; | |
| 98 struct EmulatedShape; | |
| 99 | |
| 100 struct Shape { | |
| 101 int Id; // A unique identifier. Used to identify loops, labels are Lx where x
is the Id. Defined when added to relooper | |
| 102 Shape *Next; // The shape that will appear in the code right after this one | |
| 103 Shape *Natural; // The shape that control flow gets to naturally (if there is
Next, then this is Next) | |
| 104 | |
| 105 enum ShapeType { | |
| 106 Simple, | |
| 107 Multiple, | |
| 108 Loop, | |
| 109 Emulated | |
| 110 }; | |
| 111 ShapeType Type; | |
| 112 | |
| 113 Shape(ShapeType TypeInit) : Id(-1), Next(NULL), Type(TypeInit) {} | |
| 114 virtual ~Shape() {} | |
| 115 | |
| 116 virtual void Render(bool InLoop) = 0; | |
| 117 | |
| 118 static SimpleShape *IsSimple(Shape *It) { return It && It->Type == Simple ? (S
impleShape*)It : NULL; } | |
| 119 static MultipleShape *IsMultiple(Shape *It) { return It && It->Type == Multipl
e ? (MultipleShape*)It : NULL; } | |
| 120 static LoopShape *IsLoop(Shape *It) { return It && It->Type == Loop ? (LoopSha
pe*)It : NULL; } | |
| 121 static LabeledShape *IsLabeled(Shape *It) { return IsMultiple(It) || IsLoop(It
) ? (LabeledShape*)It : NULL; } | |
| 122 static EmulatedShape *IsEmulated(Shape *It) { return It && It->Type == Emulate
d ? (EmulatedShape*)It : NULL; } | |
| 123 }; | |
| 124 | |
| 125 struct SimpleShape : public Shape { | |
| 126 Block *Inner; | |
| 127 | |
| 128 SimpleShape() : Shape(Simple), Inner(NULL) {} | |
| 129 void Render(bool InLoop) { | |
| 130 Inner->Render(InLoop); | |
| 131 if (Next) Next->Render(InLoop); | |
| 132 } | |
| 133 }; | |
| 134 | |
| 135 // A shape that may be implemented with a labeled loop. | |
| 136 struct LabeledShape : public Shape { | |
| 137 bool Labeled; // If we have a loop, whether it needs to be labeled | |
| 138 | |
| 139 LabeledShape(ShapeType TypeInit) : Shape(TypeInit), Labeled(false) {} | |
| 140 }; | |
| 141 | |
| 142 // Blocks with the same id were split and are identical, so we just care about i
ds in Multiple entries | |
| 143 typedef std::map<int, Shape*> IdShapeMap; | |
| 144 | |
| 145 struct MultipleShape : public LabeledShape { | |
| 146 IdShapeMap InnerMap; // entry block ID -> shape | |
| 147 int Breaks; // If we have branches on us, we need a loop (or a switch). This i
s a counter of requirements, | |
| 148 // if we optimize it to 0, the loop is unneeded | |
| 149 bool UseSwitch; // Whether to switch on label as opposed to an if-else chain | |
| 150 | |
| 151 MultipleShape() : LabeledShape(Multiple), Breaks(0), UseSwitch(false) {} | |
| 152 | |
| 153 void RenderLoopPrefix(); | |
| 154 void RenderLoopPostfix(); | |
| 155 | |
| 156 void Render(bool InLoop); | |
| 157 }; | |
| 158 | |
| 159 struct LoopShape : public LabeledShape { | |
| 160 Shape *Inner; | |
| 161 | |
| 162 LoopShape() : LabeledShape(Loop), Inner(NULL) {} | |
| 163 void Render(bool InLoop); | |
| 164 }; | |
| 165 | |
| 166 // TODO EmulatedShape is only partially functional. Currently it can be used for
the | |
| 167 // entire set of blocks being relooped, but not subsets. | |
| 168 struct EmulatedShape : public LabeledShape { | |
| 169 Block *Entry; | |
| 170 BlockSet Blocks; | |
| 171 | |
| 172 EmulatedShape() : LabeledShape(Emulated) { Labeled = true; } | |
| 173 void Render(bool InLoop); | |
| 174 }; | |
| 175 | |
| 176 // Implements the relooper algorithm for a function's blocks. | |
| 177 // | |
| 178 // Usage: | |
| 179 // 1. Instantiate this struct. | |
| 180 // 2. Call AddBlock with the blocks you have. Each should already | |
| 181 // have its branchings in specified (the branchings out will | |
| 182 // be calculated by the relooper). | |
| 183 // 3. Call Render(). | |
| 184 // | |
| 185 // Implementation details: The Relooper instance has | |
| 186 // ownership of the blocks and shapes, and frees them when done. | |
| 187 struct Relooper { | |
| 188 std::deque<Block*> Blocks; | |
| 189 std::deque<Shape*> Shapes; | |
| 190 Shape *Root; | |
| 191 bool Emulate; | |
| 192 bool MinSize; | |
| 193 int BlockIdCounter; | |
| 194 int ShapeIdCounter; | |
| 195 | |
| 196 Relooper(); | |
| 197 ~Relooper(); | |
| 198 | |
| 199 void AddBlock(Block *New, int Id=-1); | |
| 200 | |
| 201 // Calculates the shapes | |
| 202 void Calculate(Block *Entry); | |
| 203 | |
| 204 // Renders the result. | |
| 205 void Render(); | |
| 206 | |
| 207 // Sets the global buffer all printing goes to. Must call this or MakeOutputBu
ffer. | |
| 208 // XXX: this is deprecated, see MakeOutputBuffer | |
| 209 static void SetOutputBuffer(char *Buffer, int Size); | |
| 210 | |
| 211 // Creates an internal output buffer. Must call this or SetOutputBuffer. Size
is | |
| 212 // a hint for the initial size of the buffer, it can be resized later one dema
nd. | |
| 213 // For that reason this is more recommended than SetOutputBuffer. | |
| 214 static void MakeOutputBuffer(int Size); | |
| 215 | |
| 216 static char *GetOutputBuffer(); | |
| 217 | |
| 218 // Sets asm.js mode on or off (default is off) | |
| 219 static void SetAsmJSMode(int On); | |
| 220 | |
| 221 // Sets whether we must emulate everything with switch-loop code | |
| 222 void SetEmulate(int E) { Emulate = E; } | |
| 223 | |
| 224 // Sets us to try to minimize size | |
| 225 void SetMinSize(bool MinSize_) { MinSize = MinSize_; } | |
| 226 }; | |
| 227 | |
| 228 typedef std::map<Block*, BlockSet> BlockBlockSetMap; | |
| 229 | |
| 230 #if DEBUG | |
| 231 struct Debugging { | |
| 232 static void Dump(BlockSet &Blocks, const char *prefix=NULL); | |
| 233 static void Dump(Shape *S, const char *prefix=NULL); | |
| 234 }; | |
| 235 #endif | |
| 236 | |
| 237 #endif // __cplusplus | |
| 238 | |
| 239 // C API - useful for binding to other languages | |
| 240 | |
| 241 #ifdef _WIN32 | |
| 242 #ifdef RELOOPERDLL_EXPORTS | |
| 243 #define RELOOPERDLL_API __declspec(dllexport) | |
| 244 #else | |
| 245 #define RELOOPERDLL_API __declspec(dllimport) | |
| 246 #endif | |
| 247 #else | |
| 248 #define RELOOPERDLL_API | |
| 249 #endif | |
| 250 | |
| 251 #ifdef __cplusplus | |
| 252 extern "C" { | |
| 253 #endif | |
| 254 | |
| 255 RELOOPERDLL_API void rl_set_output_buffer(char *buffer, int size); | |
| 256 RELOOPERDLL_API void rl_make_output_buffer(int size); | |
| 257 RELOOPERDLL_API void rl_set_asm_js_mode(int on); | |
| 258 RELOOPERDLL_API void *rl_new_block(const char *text, const char *branch_var); | |
| 259 RELOOPERDLL_API void rl_delete_block(void *block); | |
| 260 RELOOPERDLL_API void rl_block_add_branch_to(void *from, void *to, const char *c
ondition, const char *code); | |
| 261 RELOOPERDLL_API void *rl_new_relooper(); | |
| 262 RELOOPERDLL_API void rl_delete_relooper(void *relooper); | |
| 263 RELOOPERDLL_API void rl_relooper_add_block(void *relooper, void *block); | |
| 264 RELOOPERDLL_API void rl_relooper_calculate(void *relooper, void *entry); | |
| 265 RELOOPERDLL_API void rl_relooper_render(void *relooper); | |
| 266 | |
| 267 #ifdef __cplusplus | |
| 268 } | |
| 269 #endif | |
| 270 | |
| OLD | NEW |