Mercurial > hg > truffle
comparison src/share/vm/opto/superword.hpp @ 0:a61af66fc99e jdk7-b24
Initial load
author | duke |
---|---|
date | Sat, 01 Dec 2007 00:00:00 +0000 |
parents | |
children | 98cb887364d3 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a61af66fc99e |
---|---|
1 /* | |
2 * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 */ | |
23 | |
24 // | |
25 // S U P E R W O R D T R A N S F O R M | |
26 // | |
27 // SuperWords are short, fixed length vectors. | |
28 // | |
29 // Algorithm from: | |
30 // | |
31 // Exploiting SuperWord Level Parallelism with | |
32 // Multimedia Instruction Sets | |
33 // by | |
34 // Samuel Larsen and Saman Amarasighe | |
35 // MIT Laboratory for Computer Science | |
36 // date | |
37 // May 2000 | |
38 // published in | |
39 // ACM SIGPLAN Notices | |
40 // Proceedings of ACM PLDI '00, Volume 35 Issue 5 | |
41 // | |
42 // Definition 3.1 A Pack is an n-tuple, <s1, ...,sn>, where | |
43 // s1,...,sn are independent isomorphic statements in a basic | |
44 // block. | |
45 // | |
46 // Definition 3.2 A PackSet is a set of Packs. | |
47 // | |
48 // Definition 3.3 A Pair is a Pack of size two, where the | |
49 // first statement is considered the left element, and the | |
50 // second statement is considered the right element. | |
51 | |
52 class SWPointer; | |
53 class OrderedPair; | |
54 | |
55 // ========================= Dependence Graph ===================== | |
56 | |
57 class DepMem; | |
58 | |
59 //------------------------------DepEdge--------------------------- | |
60 // An edge in the dependence graph. The edges incident to a dependence | |
61 // node are threaded through _next_in for incoming edges and _next_out | |
62 // for outgoing edges. | |
63 class DepEdge : public ResourceObj { | |
64 protected: | |
65 DepMem* _pred; | |
66 DepMem* _succ; | |
67 DepEdge* _next_in; // list of in edges, null terminated | |
68 DepEdge* _next_out; // list of out edges, null terminated | |
69 | |
70 public: | |
71 DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) : | |
72 _pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {} | |
73 | |
74 DepEdge* next_in() { return _next_in; } | |
75 DepEdge* next_out() { return _next_out; } | |
76 DepMem* pred() { return _pred; } | |
77 DepMem* succ() { return _succ; } | |
78 | |
79 void print(); | |
80 }; | |
81 | |
82 //------------------------------DepMem--------------------------- | |
83 // A node in the dependence graph. _in_head starts the threaded list of | |
84 // incoming edges, and _out_head starts the list of outgoing edges. | |
85 class DepMem : public ResourceObj { | |
86 protected: | |
87 Node* _node; // Corresponding ideal node | |
88 DepEdge* _in_head; // Head of list of in edges, null terminated | |
89 DepEdge* _out_head; // Head of list of out edges, null terminated | |
90 | |
91 public: | |
92 DepMem(Node* node) : _node(node), _in_head(NULL), _out_head(NULL) {} | |
93 | |
94 Node* node() { return _node; } | |
95 DepEdge* in_head() { return _in_head; } | |
96 DepEdge* out_head() { return _out_head; } | |
97 void set_in_head(DepEdge* hd) { _in_head = hd; } | |
98 void set_out_head(DepEdge* hd) { _out_head = hd; } | |
99 | |
100 int in_cnt(); // Incoming edge count | |
101 int out_cnt(); // Outgoing edge count | |
102 | |
103 void print(); | |
104 }; | |
105 | |
106 //------------------------------DepGraph--------------------------- | |
107 class DepGraph VALUE_OBJ_CLASS_SPEC { | |
108 protected: | |
109 Arena* _arena; | |
110 GrowableArray<DepMem*> _map; | |
111 DepMem* _root; | |
112 DepMem* _tail; | |
113 | |
114 public: | |
115 DepGraph(Arena* a) : _arena(a), _map(a, 8, 0, NULL) { | |
116 _root = new (_arena) DepMem(NULL); | |
117 _tail = new (_arena) DepMem(NULL); | |
118 } | |
119 | |
120 DepMem* root() { return _root; } | |
121 DepMem* tail() { return _tail; } | |
122 | |
123 // Return dependence node corresponding to an ideal node | |
124 DepMem* dep(Node* node) { return _map.at(node->_idx); } | |
125 | |
126 // Make a new dependence graph node for an ideal node. | |
127 DepMem* make_node(Node* node); | |
128 | |
129 // Make a new dependence graph edge dprec->dsucc | |
130 DepEdge* make_edge(DepMem* dpred, DepMem* dsucc); | |
131 | |
132 DepEdge* make_edge(Node* pred, Node* succ) { return make_edge(dep(pred), dep(succ)); } | |
133 DepEdge* make_edge(DepMem* pred, Node* succ) { return make_edge(pred, dep(succ)); } | |
134 DepEdge* make_edge(Node* pred, DepMem* succ) { return make_edge(dep(pred), succ); } | |
135 | |
136 void init() { _map.clear(); } // initialize | |
137 | |
138 void print(Node* n) { dep(n)->print(); } | |
139 void print(DepMem* d) { d->print(); } | |
140 }; | |
141 | |
142 //------------------------------DepPreds--------------------------- | |
143 // Iterator over predecessors in the dependence graph and | |
144 // non-memory-graph inputs of ideal nodes. | |
145 class DepPreds : public StackObj { | |
146 private: | |
147 Node* _n; | |
148 int _next_idx, _end_idx; | |
149 DepEdge* _dep_next; | |
150 Node* _current; | |
151 bool _done; | |
152 | |
153 public: | |
154 DepPreds(Node* n, DepGraph& dg); | |
155 Node* current() { return _current; } | |
156 bool done() { return _done; } | |
157 void next(); | |
158 }; | |
159 | |
160 //------------------------------DepSuccs--------------------------- | |
161 // Iterator over successors in the dependence graph and | |
162 // non-memory-graph outputs of ideal nodes. | |
163 class DepSuccs : public StackObj { | |
164 private: | |
165 Node* _n; | |
166 int _next_idx, _end_idx; | |
167 DepEdge* _dep_next; | |
168 Node* _current; | |
169 bool _done; | |
170 | |
171 public: | |
172 DepSuccs(Node* n, DepGraph& dg); | |
173 Node* current() { return _current; } | |
174 bool done() { return _done; } | |
175 void next(); | |
176 }; | |
177 | |
178 | |
179 // ========================= SuperWord ===================== | |
180 | |
181 // -----------------------------SWNodeInfo--------------------------------- | |
182 // Per node info needed by SuperWord | |
183 class SWNodeInfo VALUE_OBJ_CLASS_SPEC { | |
184 public: | |
185 int _alignment; // memory alignment for a node | |
186 int _depth; // Max expression (DAG) depth from block start | |
187 const Type* _velt_type; // vector element type | |
188 Node_List* _my_pack; // pack containing this node | |
189 | |
190 SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {} | |
191 static const SWNodeInfo initial; | |
192 }; | |
193 | |
194 // -----------------------------SuperWord--------------------------------- | |
195 // Transforms scalar operations into packed (superword) operations. | |
196 class SuperWord : public ResourceObj { | |
197 private: | |
198 PhaseIdealLoop* _phase; | |
199 Arena* _arena; | |
200 PhaseIterGVN &_igvn; | |
201 | |
202 enum consts { top_align = -1, bottom_align = -666 }; | |
203 | |
204 GrowableArray<Node_List*> _packset; // Packs for the current block | |
205 | |
206 GrowableArray<int> _bb_idx; // Map from Node _idx to index within block | |
207 | |
208 GrowableArray<Node*> _block; // Nodes in current block | |
209 GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside | |
210 GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes | |
211 GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes | |
212 | |
213 GrowableArray<SWNodeInfo> _node_info; // Info needed per node | |
214 | |
215 MemNode* _align_to_ref; // Memory reference that pre-loop will align to | |
216 | |
217 GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs | |
218 | |
219 DepGraph _dg; // Dependence graph | |
220 | |
221 // Scratch pads | |
222 VectorSet _visited; // Visited set | |
223 VectorSet _post_visited; // Post-visited set | |
224 Node_Stack _n_idx_list; // List of (node,index) pairs | |
225 GrowableArray<Node*> _nlist; // List of nodes | |
226 GrowableArray<Node*> _stk; // Stack of nodes | |
227 | |
228 public: | |
229 SuperWord(PhaseIdealLoop* phase); | |
230 | |
231 void transform_loop(IdealLoopTree* lpt); | |
232 | |
233 // Accessors for SWPointer | |
234 PhaseIdealLoop* phase() { return _phase; } | |
235 IdealLoopTree* lpt() { return _lpt; } | |
236 PhiNode* iv() { return _iv; } | |
237 | |
238 private: | |
239 IdealLoopTree* _lpt; // Current loop tree node | |
240 LoopNode* _lp; // Current LoopNode | |
241 Node* _bb; // Current basic block | |
242 PhiNode* _iv; // Induction var | |
243 | |
244 // Accessors | |
245 Arena* arena() { return _arena; } | |
246 | |
247 Node* bb() { return _bb; } | |
248 void set_bb(Node* bb) { _bb = bb; } | |
249 | |
250 void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; } | |
251 | |
252 LoopNode* lp() { return _lp; } | |
253 void set_lp(LoopNode* lp) { _lp = lp; | |
254 _iv = lp->as_CountedLoop()->phi()->as_Phi(); } | |
255 int iv_stride() { return lp()->as_CountedLoop()->stride_con(); } | |
256 | |
257 int vector_width_in_bytes() { return Matcher::vector_width_in_bytes(); } | |
258 | |
259 MemNode* align_to_ref() { return _align_to_ref; } | |
260 void set_align_to_ref(MemNode* m) { _align_to_ref = m; } | |
261 | |
262 Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; } | |
263 | |
264 // block accessors | |
265 bool in_bb(Node* n) { return n != NULL && n->outcnt() > 0 && ctrl(n) == _bb; } | |
266 int bb_idx(Node* n) { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); } | |
267 void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); } | |
268 | |
269 // visited set accessors | |
270 void visited_clear() { _visited.Clear(); } | |
271 void visited_set(Node* n) { return _visited.set(bb_idx(n)); } | |
272 int visited_test(Node* n) { return _visited.test(bb_idx(n)); } | |
273 int visited_test_set(Node* n) { return _visited.test_set(bb_idx(n)); } | |
274 void post_visited_clear() { _post_visited.Clear(); } | |
275 void post_visited_set(Node* n) { return _post_visited.set(bb_idx(n)); } | |
276 int post_visited_test(Node* n) { return _post_visited.test(bb_idx(n)); } | |
277 | |
278 // Ensure node_info contains element "i" | |
279 void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); } | |
280 | |
281 // memory alignment for a node | |
282 int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; } | |
283 void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; } | |
284 | |
285 // Max expression (DAG) depth from beginning of the block for each node | |
286 int depth(Node* n) { return _node_info.adr_at(bb_idx(n))->_depth; } | |
287 void set_depth(Node* n, int d) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; } | |
288 | |
289 // vector element type | |
290 const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; } | |
291 void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; } | |
292 | |
293 // my_pack | |
294 Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } | |
295 void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } | |
296 | |
297 // methods | |
298 | |
299 // Extract the superword level parallelism | |
300 void SLP_extract(); | |
301 // Find the adjacent memory references and create pack pairs for them. | |
302 void find_adjacent_refs(); | |
303 // Find a memory reference to align the loop induction variable to. | |
304 void find_align_to_ref(Node_List &memops); | |
305 // Can the preloop align the reference to position zero in the vector? | |
306 bool ref_is_alignable(SWPointer& p); | |
307 // Construct dependency graph. | |
308 void dependence_graph(); | |
309 // Return a memory slice (node list) in predecessor order starting at "start" | |
310 void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds); | |
311 // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and s1 aligned at "align" | |
312 bool stmts_can_pack(Node* s1, Node* s2, int align); | |
313 // Does s exist in a pack at position pos? | |
314 bool exists_at(Node* s, uint pos); | |
315 // Is s1 immediately before s2 in memory? | |
316 bool are_adjacent_refs(Node* s1, Node* s2); | |
317 // Are s1 and s2 similar? | |
318 bool isomorphic(Node* s1, Node* s2); | |
319 // Is there no data path from s1 to s2 or s2 to s1? | |
320 bool independent(Node* s1, Node* s2); | |
321 // Helper for independent | |
322 bool independent_path(Node* shallow, Node* deep, uint dp=0); | |
323 void set_alignment(Node* s1, Node* s2, int align); | |
324 int data_size(Node* s); | |
325 // Extend packset by following use->def and def->use links from pack members. | |
326 void extend_packlist(); | |
327 // Extend the packset by visiting operand definitions of nodes in pack p | |
328 bool follow_use_defs(Node_List* p); | |
329 // Extend the packset by visiting uses of nodes in pack p | |
330 bool follow_def_uses(Node_List* p); | |
331 // Estimate the savings from executing s1 and s2 as a pack | |
332 int est_savings(Node* s1, Node* s2); | |
333 int adjacent_profit(Node* s1, Node* s2); | |
334 int pack_cost(int ct); | |
335 int unpack_cost(int ct); | |
336 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last | |
337 void combine_packs(); | |
338 // Construct the map from nodes to packs. | |
339 void construct_my_pack_map(); | |
340 // Remove packs that are not implemented or not profitable. | |
341 void filter_packs(); | |
342 // Adjust the memory graph for the packed operations | |
343 void schedule(); | |
344 // Within a pack, move stores down to the last executed store, | |
345 // and move loads up to the first executed load. | |
346 void co_locate_pack(Node_List* p); | |
347 // Convert packs into vector node operations | |
348 void output(); | |
349 // Create a vector operand for the nodes in pack p for operand: in(opd_idx) | |
350 VectorNode* vector_opd(Node_List* p, int opd_idx); | |
351 // Can code be generated for pack p? | |
352 bool implemented(Node_List* p); | |
353 // For pack p, are all operands and all uses (with in the block) vector? | |
354 bool profitable(Node_List* p); | |
355 // If a use of pack p is not a vector use, then replace the use with an extract operation. | |
356 void insert_extracts(Node_List* p); | |
357 // Is use->in(u_idx) a vector use? | |
358 bool is_vector_use(Node* use, int u_idx); | |
359 // Construct reverse postorder list of block members | |
360 void construct_bb(); | |
361 // Initialize per node info | |
362 void initialize_bb(); | |
363 // Insert n into block after pos | |
364 void bb_insert_after(Node* n, int pos); | |
365 // Compute max depth for expressions from beginning of block | |
366 void compute_max_depth(); | |
367 // Compute necessary vector element type for expressions | |
368 void compute_vector_element_type(); | |
369 // Are s1 and s2 in a pack pair and ordered as s1,s2? | |
370 bool in_packset(Node* s1, Node* s2); | |
371 // Is s in pack p? | |
372 Node_List* in_pack(Node* s, Node_List* p); | |
373 // Remove the pack at position pos in the packset | |
374 void remove_pack_at(int pos); | |
375 // Return the node executed first in pack p. | |
376 Node* executed_first(Node_List* p); | |
377 // Return the node executed last in pack p. | |
378 Node* executed_last(Node_List* p); | |
379 // Alignment within a vector memory reference | |
380 int memory_alignment(MemNode* s, int iv_adjust_in_bytes); | |
381 // (Start, end] half-open range defining which operands are vector | |
382 void vector_opd_range(Node* n, uint* start, uint* end); | |
383 // Smallest type containing range of values | |
384 static const Type* container_type(const Type* t); | |
385 // Adjust pre-loop limit so that in main loop, a load/store reference | |
386 // to align_to_ref will be a position zero in the vector. | |
387 void align_initial_loop_index(MemNode* align_to_ref); | |
388 // Find pre loop end from main loop. Returns null if none. | |
389 CountedLoopEndNode* get_pre_loop_end(CountedLoopNode *cl); | |
390 // Is the use of d1 in u1 at the same operand position as d2 in u2? | |
391 bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2); | |
392 void init(); | |
393 | |
394 // print methods | |
395 void print_packset(); | |
396 void print_pack(Node_List* p); | |
397 void print_bb(); | |
398 void print_stmt(Node* s); | |
399 char* blank(uint depth); | |
400 }; | |
401 | |
402 | |
403 //------------------------------SWPointer--------------------------- | |
404 // Information about an address for dependence checking and vector alignment | |
405 class SWPointer VALUE_OBJ_CLASS_SPEC { | |
406 protected: | |
407 MemNode* _mem; // My memory reference node | |
408 SuperWord* _slp; // SuperWord class | |
409 | |
410 Node* _base; // NULL if unsafe nonheap reference | |
411 Node* _adr; // address pointer | |
412 jint _scale; // multipler for iv (in bytes), 0 if no loop iv | |
413 jint _offset; // constant offset (in bytes) | |
414 Node* _invar; // invariant offset (in bytes), NULL if none | |
415 bool _negate_invar; // if true then use: (0 - _invar) | |
416 | |
417 PhaseIdealLoop* phase() { return _slp->phase(); } | |
418 IdealLoopTree* lpt() { return _slp->lpt(); } | |
419 PhiNode* iv() { return _slp->iv(); } // Induction var | |
420 | |
421 bool invariant(Node* n) { | |
422 Node *n_c = phase()->get_ctrl(n); | |
423 return !lpt()->is_member(phase()->get_loop(n_c)); | |
424 } | |
425 | |
426 // Match: k*iv + offset | |
427 bool scaled_iv_plus_offset(Node* n); | |
428 // Match: k*iv where k is a constant that's not zero | |
429 bool scaled_iv(Node* n); | |
430 // Match: offset is (k [+/- invariant]) | |
431 bool offset_plus_k(Node* n, bool negate = false); | |
432 | |
433 public: | |
434 enum CMP { | |
435 Less = 1, | |
436 Greater = 2, | |
437 Equal = 4, | |
438 NotEqual = (Less | Greater), | |
439 NotComparable = (Less | Greater | Equal) | |
440 }; | |
441 | |
442 SWPointer(MemNode* mem, SuperWord* slp); | |
443 // Following is used to create a temporary object during | |
444 // the pattern match of an address expression. | |
445 SWPointer(SWPointer* p); | |
446 | |
447 bool valid() { return _adr != NULL; } | |
448 bool has_iv() { return _scale != 0; } | |
449 | |
450 Node* base() { return _base; } | |
451 Node* adr() { return _adr; } | |
452 int scale_in_bytes() { return _scale; } | |
453 Node* invar() { return _invar; } | |
454 bool negate_invar() { return _negate_invar; } | |
455 int offset_in_bytes() { return _offset; } | |
456 int memory_size() { return _mem->memory_size(); } | |
457 | |
458 // Comparable? | |
459 int cmp(SWPointer& q) { | |
460 if (valid() && q.valid() && | |
461 (_adr == q._adr || _base == _adr && q._base == q._adr) && | |
462 _scale == q._scale && | |
463 _invar == q._invar && | |
464 _negate_invar == q._negate_invar) { | |
465 bool overlap = q._offset < _offset + memory_size() && | |
466 _offset < q._offset + q.memory_size(); | |
467 return overlap ? Equal : (_offset < q._offset ? Less : Greater); | |
468 } else { | |
469 return NotComparable; | |
470 } | |
471 } | |
472 | |
473 bool not_equal(SWPointer& q) { return not_equal(cmp(q)); } | |
474 bool equal(SWPointer& q) { return equal(cmp(q)); } | |
475 bool comparable(SWPointer& q) { return comparable(cmp(q)); } | |
476 static bool not_equal(int cmp) { return cmp <= NotEqual; } | |
477 static bool equal(int cmp) { return cmp == Equal; } | |
478 static bool comparable(int cmp) { return cmp < NotComparable; } | |
479 | |
480 void print(); | |
481 }; | |
482 | |
483 | |
484 //------------------------------OrderedPair--------------------------- | |
485 // Ordered pair of Node*. | |
486 class OrderedPair VALUE_OBJ_CLASS_SPEC { | |
487 protected: | |
488 Node* _p1; | |
489 Node* _p2; | |
490 public: | |
491 OrderedPair() : _p1(NULL), _p2(NULL) {} | |
492 OrderedPair(Node* p1, Node* p2) { | |
493 if (p1->_idx < p2->_idx) { | |
494 _p1 = p1; _p2 = p2; | |
495 } else { | |
496 _p1 = p2; _p2 = p1; | |
497 } | |
498 } | |
499 | |
500 bool operator==(const OrderedPair &rhs) { | |
501 return _p1 == rhs._p1 && _p2 == rhs._p2; | |
502 } | |
503 void print() { tty->print(" (%d, %d)", _p1->_idx, _p2->_idx); } | |
504 | |
505 static const OrderedPair initial; | |
506 }; |