Mercurial > hg > truffle
comparison src/share/vm/opto/superword.cpp @ 6179:8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
author | kvn |
---|---|
date | Fri, 15 Jun 2012 01:25:19 -0700 |
parents | 5e990493719e |
children | 6f8f439e247d |
comparison
equal
deleted
inserted
replaced
6146:eba1d5bce9e8 | 6179:8c92982cbbc4 |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | 2 * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | 4 * |
5 * This code is free software; you can redistribute it and/or modify it | 5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as | 6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
65 _iv(NULL) // induction var | 65 _iv(NULL) // induction var |
66 {} | 66 {} |
67 | 67 |
68 //------------------------------transform_loop--------------------------- | 68 //------------------------------transform_loop--------------------------- |
69 void SuperWord::transform_loop(IdealLoopTree* lpt) { | 69 void SuperWord::transform_loop(IdealLoopTree* lpt) { |
70 assert(UseSuperWord, "should be"); | |
71 // Do vectors exist on this architecture? | |
72 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; | |
73 | |
70 assert(lpt->_head->is_CountedLoop(), "must be"); | 74 assert(lpt->_head->is_CountedLoop(), "must be"); |
71 CountedLoopNode *cl = lpt->_head->as_CountedLoop(); | 75 CountedLoopNode *cl = lpt->_head->as_CountedLoop(); |
72 | 76 |
73 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop | 77 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop |
74 | 78 |
87 CountedLoopEndNode* pre_end = get_pre_loop_end(cl); | 91 CountedLoopEndNode* pre_end = get_pre_loop_end(cl); |
88 if (pre_end == NULL) return; | 92 if (pre_end == NULL) return; |
89 Node *pre_opaq1 = pre_end->limit(); | 93 Node *pre_opaq1 = pre_end->limit(); |
90 if (pre_opaq1->Opcode() != Op_Opaque1) return; | 94 if (pre_opaq1->Opcode() != Op_Opaque1) return; |
91 | 95 |
92 // Do vectors exist on this architecture? | |
93 if (vector_width_in_bytes() == 0) return; | |
94 | |
95 init(); // initialize data structures | 96 init(); // initialize data structures |
96 | 97 |
97 set_lpt(lpt); | 98 set_lpt(lpt); |
98 set_lp(cl); | 99 set_lp(cl); |
99 | 100 |
100 // For now, define one block which is the entire loop body | 101 // For now, define one block which is the entire loop body |
101 set_bb(cl); | 102 set_bb(cl); |
102 | 103 |
103 assert(_packset.length() == 0, "packset must be empty"); | 104 assert(_packset.length() == 0, "packset must be empty"); |
104 SLP_extract(); | 105 SLP_extract(); |
105 } | 106 } |
175 void SuperWord::find_adjacent_refs() { | 176 void SuperWord::find_adjacent_refs() { |
176 // Get list of memory operations | 177 // Get list of memory operations |
177 Node_List memops; | 178 Node_List memops; |
178 for (int i = 0; i < _block.length(); i++) { | 179 for (int i = 0; i < _block.length(); i++) { |
179 Node* n = _block.at(i); | 180 Node* n = _block.at(i); |
180 if (n->is_Mem() && in_bb(n) && | 181 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && |
181 is_java_primitive(n->as_Mem()->memory_type())) { | 182 is_java_primitive(n->as_Mem()->memory_type())) { |
182 int align = memory_alignment(n->as_Mem(), 0); | 183 int align = memory_alignment(n->as_Mem(), 0); |
183 if (align != bottom_align) { | 184 if (align != bottom_align) { |
184 memops.push(n); | 185 memops.push(n); |
185 } | 186 } |
186 } | 187 } |
187 } | 188 } |
188 if (memops.size() == 0) return; | 189 |
189 | 190 Node_List align_to_refs; |
190 // Find a memory reference to align to. The pre-loop trip count | 191 int best_iv_adjustment = 0; |
191 // is modified to align this reference to a vector-aligned address | 192 MemNode* best_align_to_mem_ref = NULL; |
192 find_align_to_ref(memops); | 193 |
193 if (align_to_ref() == NULL) return; | 194 while (memops.size() != 0) { |
194 | 195 // Find a memory reference to align to. |
195 SWPointer align_to_ref_p(align_to_ref(), this); | 196 MemNode* mem_ref = find_align_to_ref(memops); |
196 int offset = align_to_ref_p.offset_in_bytes(); | 197 if (mem_ref == NULL) break; |
197 int scale = align_to_ref_p.scale_in_bytes(); | 198 align_to_refs.push(mem_ref); |
198 int vw = vector_width_in_bytes(); | 199 int iv_adjustment = get_iv_adjustment(mem_ref); |
199 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; | 200 |
200 int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; | 201 if (best_align_to_mem_ref == NULL) { |
201 | 202 // Set memory reference which is the best from all memory operations |
202 #ifndef PRODUCT | 203 // to be used for alignment. The pre-loop trip count is modified to align |
203 if (TraceSuperWord) | 204 // this reference to a vector-aligned address. |
204 tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d", | 205 best_align_to_mem_ref = mem_ref; |
205 offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride()); | 206 best_iv_adjustment = iv_adjustment; |
206 #endif | 207 } |
207 | 208 |
208 // Set alignment relative to "align_to_ref" | 209 SWPointer align_to_ref_p(mem_ref, this); |
209 for (int i = memops.size() - 1; i >= 0; i--) { | 210 // Set alignment relative to "align_to_ref" for all related memory operations. |
210 MemNode* s = memops.at(i)->as_Mem(); | 211 for (int i = memops.size() - 1; i >= 0; i--) { |
211 SWPointer p2(s, this); | 212 MemNode* s = memops.at(i)->as_Mem(); |
212 if (p2.comparable(align_to_ref_p)) { | 213 if (isomorphic(s, mem_ref)) { |
213 int align = memory_alignment(s, iv_adjustment); | 214 SWPointer p2(s, this); |
214 set_alignment(s, align); | 215 if (p2.comparable(align_to_ref_p)) { |
215 } else { | 216 int align = memory_alignment(s, iv_adjustment); |
216 memops.remove(i); | 217 set_alignment(s, align); |
217 } | 218 } |
218 } | 219 } |
219 | 220 } |
220 // Create initial pack pairs of memory operations | 221 |
221 for (uint i = 0; i < memops.size(); i++) { | 222 // Create initial pack pairs of memory operations for which |
222 Node* s1 = memops.at(i); | 223 // alignment is set and vectors will be aligned. |
223 for (uint j = 0; j < memops.size(); j++) { | 224 bool create_pack = true; |
224 Node* s2 = memops.at(j); | 225 if (memory_alignment(mem_ref, best_iv_adjustment) != 0) { |
225 if (s1 != s2 && are_adjacent_refs(s1, s2)) { | 226 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { |
227 // Can't allow vectorization of unaligned memory accesses with the | |
228 // same type since it could be overlapped accesses to the same array. | |
229 create_pack = false; | |
230 } else { | |
231 // Allow independent (different type) unaligned memory operations | |
232 // if HW supports them. | |
233 if (!Matcher::misaligned_vectors_ok()) { | |
234 create_pack = false; | |
235 } else { | |
236 // Check if packs of the same memory type but | |
237 // with a different alignment were created before. | |
238 for (uint i = 0; i < align_to_refs.size(); i++) { | |
239 MemNode* mr = align_to_refs.at(i)->as_Mem(); | |
240 if (same_velt_type(mr, mem_ref) && | |
241 memory_alignment(mr, iv_adjustment) != 0) | |
242 create_pack = false; | |
243 } | |
244 } | |
245 } | |
246 } | |
247 if (create_pack) { | |
248 for (uint i = 0; i < memops.size(); i++) { | |
249 Node* s1 = memops.at(i); | |
226 int align = alignment(s1); | 250 int align = alignment(s1); |
227 if (stmts_can_pack(s1, s2, align)) { | 251 if (align == top_align) continue; |
228 Node_List* pair = new Node_List(); | 252 for (uint j = 0; j < memops.size(); j++) { |
229 pair->push(s1); | 253 Node* s2 = memops.at(j); |
230 pair->push(s2); | 254 if (alignment(s2) == top_align) continue; |
231 _packset.append(pair); | 255 if (s1 != s2 && are_adjacent_refs(s1, s2)) { |
232 } | 256 if (stmts_can_pack(s1, s2, align)) { |
233 } | 257 Node_List* pair = new Node_List(); |
234 } | 258 pair->push(s1); |
235 } | 259 pair->push(s2); |
260 _packset.append(pair); | |
261 } | |
262 } | |
263 } | |
264 } | |
265 } else { // Don't create unaligned pack | |
266 // First, remove remaining memory ops of the same type from the list. | |
267 for (int i = memops.size() - 1; i >= 0; i--) { | |
268 MemNode* s = memops.at(i)->as_Mem(); | |
269 if (same_velt_type(s, mem_ref)) { | |
270 memops.remove(i); | |
271 } | |
272 } | |
273 | |
274 // Second, remove already constructed packs of the same type. | |
275 for (int i = _packset.length() - 1; i >= 0; i--) { | |
276 Node_List* p = _packset.at(i); | |
277 MemNode* s = p->at(0)->as_Mem(); | |
278 if (same_velt_type(s, mem_ref)) { | |
279 remove_pack_at(i); | |
280 } | |
281 } | |
282 | |
283 // If needed find the best memory reference for loop alignment again. | |
284 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { | |
285 // Put memory ops from remaining packs back on memops list for | |
286 // the best alignment search. | |
287 uint orig_msize = memops.size(); | |
288 for (int i = 0; i < _packset.length(); i++) { | |
289 Node_List* p = _packset.at(i); | |
290 MemNode* s = p->at(0)->as_Mem(); | |
291 assert(!same_velt_type(s, mem_ref), "sanity"); | |
292 memops.push(s); | |
293 } | |
294 MemNode* best_align_to_mem_ref = find_align_to_ref(memops); | |
295 if (best_align_to_mem_ref == NULL) break; | |
296 best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); | |
297 // Restore list. | |
298 while (memops.size() > orig_msize) | |
299 (void)memops.pop(); | |
300 } | |
301 } // unaligned memory accesses | |
302 | |
303 // Remove used mem nodes. | |
304 for (int i = memops.size() - 1; i >= 0; i--) { | |
305 MemNode* m = memops.at(i)->as_Mem(); | |
306 if (alignment(m) != top_align) { | |
307 memops.remove(i); | |
308 } | |
309 } | |
310 | |
311 } // while (memops.size() != 0 | |
312 set_align_to_ref(best_align_to_mem_ref); | |
236 | 313 |
237 #ifndef PRODUCT | 314 #ifndef PRODUCT |
238 if (TraceSuperWord) { | 315 if (TraceSuperWord) { |
239 tty->print_cr("\nAfter find_adjacent_refs"); | 316 tty->print_cr("\nAfter find_adjacent_refs"); |
240 print_packset(); | 317 print_packset(); |
244 | 321 |
245 //------------------------------find_align_to_ref--------------------------- | 322 //------------------------------find_align_to_ref--------------------------- |
246 // Find a memory reference to align the loop induction variable to. | 323 // Find a memory reference to align the loop induction variable to. |
247 // Looks first at stores then at loads, looking for a memory reference | 324 // Looks first at stores then at loads, looking for a memory reference |
248 // with the largest number of references similar to it. | 325 // with the largest number of references similar to it. |
249 void SuperWord::find_align_to_ref(Node_List &memops) { | 326 MemNode* SuperWord::find_align_to_ref(Node_List &memops) { |
250 GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); | 327 GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); |
251 | 328 |
252 // Count number of comparable memory ops | 329 // Count number of comparable memory ops |
253 for (uint i = 0; i < memops.size(); i++) { | 330 for (uint i = 0; i < memops.size(); i++) { |
254 MemNode* s1 = memops.at(i)->as_Mem(); | 331 MemNode* s1 = memops.at(i)->as_Mem(); |
268 } | 345 } |
269 } | 346 } |
270 } | 347 } |
271 } | 348 } |
272 | 349 |
273 // Find Store (or Load) with the greatest number of "comparable" references | 350 // Find Store (or Load) with the greatest number of "comparable" references, |
351 // biggest vector size, smallest data size and smallest iv offset. | |
274 int max_ct = 0; | 352 int max_ct = 0; |
353 int max_vw = 0; | |
275 int max_idx = -1; | 354 int max_idx = -1; |
276 int min_size = max_jint; | 355 int min_size = max_jint; |
277 int min_iv_offset = max_jint; | 356 int min_iv_offset = max_jint; |
278 for (uint j = 0; j < memops.size(); j++) { | 357 for (uint j = 0; j < memops.size(); j++) { |
279 MemNode* s = memops.at(j)->as_Mem(); | 358 MemNode* s = memops.at(j)->as_Mem(); |
280 if (s->is_Store()) { | 359 if (s->is_Store()) { |
360 int vw = vector_width_in_bytes(velt_basic_type(s)); | |
361 assert(vw > 1, "sanity"); | |
281 SWPointer p(s, this); | 362 SWPointer p(s, this); |
282 if (cmp_ct.at(j) > max_ct || | 363 if (cmp_ct.at(j) > max_ct || |
283 cmp_ct.at(j) == max_ct && (data_size(s) < min_size || | 364 cmp_ct.at(j) == max_ct && |
284 data_size(s) == min_size && | 365 (vw > max_vw || |
285 p.offset_in_bytes() < min_iv_offset)) { | 366 vw == max_vw && |
367 (data_size(s) < min_size || | |
368 data_size(s) == min_size && | |
369 (p.offset_in_bytes() < min_iv_offset)))) { | |
286 max_ct = cmp_ct.at(j); | 370 max_ct = cmp_ct.at(j); |
371 max_vw = vw; | |
287 max_idx = j; | 372 max_idx = j; |
288 min_size = data_size(s); | 373 min_size = data_size(s); |
289 min_iv_offset = p.offset_in_bytes(); | 374 min_iv_offset = p.offset_in_bytes(); |
290 } | 375 } |
291 } | 376 } |
293 // If no stores, look at loads | 378 // If no stores, look at loads |
294 if (max_ct == 0) { | 379 if (max_ct == 0) { |
295 for (uint j = 0; j < memops.size(); j++) { | 380 for (uint j = 0; j < memops.size(); j++) { |
296 MemNode* s = memops.at(j)->as_Mem(); | 381 MemNode* s = memops.at(j)->as_Mem(); |
297 if (s->is_Load()) { | 382 if (s->is_Load()) { |
383 int vw = vector_width_in_bytes(velt_basic_type(s)); | |
384 assert(vw > 1, "sanity"); | |
298 SWPointer p(s, this); | 385 SWPointer p(s, this); |
299 if (cmp_ct.at(j) > max_ct || | 386 if (cmp_ct.at(j) > max_ct || |
300 cmp_ct.at(j) == max_ct && (data_size(s) < min_size || | 387 cmp_ct.at(j) == max_ct && |
301 data_size(s) == min_size && | 388 (vw > max_vw || |
302 p.offset_in_bytes() < min_iv_offset)) { | 389 vw == max_vw && |
390 (data_size(s) < min_size || | |
391 data_size(s) == min_size && | |
392 (p.offset_in_bytes() < min_iv_offset)))) { | |
303 max_ct = cmp_ct.at(j); | 393 max_ct = cmp_ct.at(j); |
394 max_vw = vw; | |
304 max_idx = j; | 395 max_idx = j; |
305 min_size = data_size(s); | 396 min_size = data_size(s); |
306 min_iv_offset = p.offset_in_bytes(); | 397 min_iv_offset = p.offset_in_bytes(); |
307 } | 398 } |
308 } | 399 } |
309 } | 400 } |
310 } | 401 } |
311 | 402 |
312 if (max_ct > 0) | 403 #ifdef ASSERT |
313 set_align_to_ref(memops.at(max_idx)->as_Mem()); | |
314 | |
315 #ifndef PRODUCT | |
316 if (TraceSuperWord && Verbose) { | 404 if (TraceSuperWord && Verbose) { |
317 tty->print_cr("\nVector memops after find_align_to_refs"); | 405 tty->print_cr("\nVector memops after find_align_to_refs"); |
318 for (uint i = 0; i < memops.size(); i++) { | 406 for (uint i = 0; i < memops.size(); i++) { |
319 MemNode* s = memops.at(i)->as_Mem(); | 407 MemNode* s = memops.at(i)->as_Mem(); |
320 s->dump(); | 408 s->dump(); |
321 } | 409 } |
322 } | 410 } |
323 #endif | 411 #endif |
412 | |
413 if (max_ct > 0) { | |
414 #ifdef ASSERT | |
415 if (TraceSuperWord) { | |
416 tty->print("\nVector align to node: "); | |
417 memops.at(max_idx)->as_Mem()->dump(); | |
418 } | |
419 #endif | |
420 return memops.at(max_idx)->as_Mem(); | |
421 } | |
422 return NULL; | |
324 } | 423 } |
325 | 424 |
326 //------------------------------ref_is_alignable--------------------------- | 425 //------------------------------ref_is_alignable--------------------------- |
327 // Can the preloop align the reference to position zero in the vector? | 426 // Can the preloop align the reference to position zero in the vector? |
328 bool SuperWord::ref_is_alignable(SWPointer& p) { | 427 bool SuperWord::ref_is_alignable(SWPointer& p) { |
339 if (ABS(span) == p.memory_size()) | 438 if (ABS(span) == p.memory_size()) |
340 return true; | 439 return true; |
341 | 440 |
342 // If initial offset from start of object is computable, | 441 // If initial offset from start of object is computable, |
343 // compute alignment within the vector. | 442 // compute alignment within the vector. |
344 int vw = vector_width_in_bytes(); | 443 BasicType bt = velt_basic_type(p.mem()); |
444 int vw = vector_width_in_bytes(bt); | |
445 assert(vw > 1, "sanity"); | |
345 if (vw % span == 0) { | 446 if (vw % span == 0) { |
346 Node* init_nd = pre_end->init_trip(); | 447 Node* init_nd = pre_end->init_trip(); |
347 if (init_nd->is_Con() && p.invar() == NULL) { | 448 if (init_nd->is_Con() && p.invar() == NULL) { |
348 int init = init_nd->bottom_type()->is_int()->get_con(); | 449 int init = init_nd->bottom_type()->is_int()->get_con(); |
349 | 450 |
357 return (init_offset % vw) % -span == 0; | 458 return (init_offset % vw) % -span == 0; |
358 } | 459 } |
359 } | 460 } |
360 } | 461 } |
361 return false; | 462 return false; |
463 } | |
464 | |
465 //---------------------------get_iv_adjustment--------------------------- | |
466 // Calculate loop's iv adjustment for this memory ops. | |
467 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { | |
468 SWPointer align_to_ref_p(mem_ref, this); | |
469 int offset = align_to_ref_p.offset_in_bytes(); | |
470 int scale = align_to_ref_p.scale_in_bytes(); | |
471 BasicType bt = velt_basic_type(mem_ref); | |
472 int vw = vector_width_in_bytes(bt); | |
473 assert(vw > 1, "sanity"); | |
474 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; | |
475 int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; | |
476 | |
477 #ifndef PRODUCT | |
478 if (TraceSuperWord) | |
479 tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", | |
480 offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw); | |
481 #endif | |
482 return iv_adjustment; | |
362 } | 483 } |
363 | 484 |
364 //---------------------------dependence_graph--------------------------- | 485 //---------------------------dependence_graph--------------------------- |
365 // Construct dependency graph. | 486 // Construct dependency graph. |
366 // Add dependence edges to load/store nodes for memory dependence | 487 // Add dependence edges to load/store nodes for memory dependence |
486 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and | 607 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and |
487 // s1 aligned at "align" | 608 // s1 aligned at "align" |
488 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { | 609 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { |
489 | 610 |
490 // Do not use superword for non-primitives | 611 // Do not use superword for non-primitives |
491 if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) || | 612 BasicType bt1 = velt_basic_type(s1); |
492 (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type()))) | 613 BasicType bt2 = velt_basic_type(s2); |
614 if(!is_java_primitive(bt1) || !is_java_primitive(bt2)) | |
493 return false; | 615 return false; |
616 if (Matcher::max_vector_size(bt1) < 2) { | |
617 return false; // No vectors for this type | |
618 } | |
494 | 619 |
495 if (isomorphic(s1, s2)) { | 620 if (isomorphic(s1, s2)) { |
496 if (independent(s1, s2)) { | 621 if (independent(s1, s2)) { |
497 if (!exists_at(s1, 0) && !exists_at(s2, 1)) { | 622 if (!exists_at(s1, 0) && !exists_at(s2, 1)) { |
498 if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { | 623 if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { |
550 // Are s1 and s2 similar? | 675 // Are s1 and s2 similar? |
551 bool SuperWord::isomorphic(Node* s1, Node* s2) { | 676 bool SuperWord::isomorphic(Node* s1, Node* s2) { |
552 if (s1->Opcode() != s2->Opcode()) return false; | 677 if (s1->Opcode() != s2->Opcode()) return false; |
553 if (s1->req() != s2->req()) return false; | 678 if (s1->req() != s2->req()) return false; |
554 if (s1->in(0) != s2->in(0)) return false; | 679 if (s1->in(0) != s2->in(0)) return false; |
555 if (velt_type(s1) != velt_type(s2)) return false; | 680 if (!same_velt_type(s1, s2)) return false; |
556 return true; | 681 return true; |
557 } | 682 } |
558 | 683 |
559 //------------------------------independent--------------------------- | 684 //------------------------------independent--------------------------- |
560 // Is there no data path from s1 to s2 or s2 to s1? | 685 // Is there no data path from s1 to s2 or s2 to s1? |
593 } | 718 } |
594 | 719 |
595 //------------------------------set_alignment--------------------------- | 720 //------------------------------set_alignment--------------------------- |
596 void SuperWord::set_alignment(Node* s1, Node* s2, int align) { | 721 void SuperWord::set_alignment(Node* s1, Node* s2, int align) { |
597 set_alignment(s1, align); | 722 set_alignment(s1, align); |
598 set_alignment(s2, align + data_size(s1)); | 723 if (align == top_align || align == bottom_align) { |
724 set_alignment(s2, align); | |
725 } else { | |
726 set_alignment(s2, align + data_size(s1)); | |
727 } | |
599 } | 728 } |
600 | 729 |
601 //------------------------------data_size--------------------------- | 730 //------------------------------data_size--------------------------- |
602 int SuperWord::data_size(Node* s) { | 731 int SuperWord::data_size(Node* s) { |
603 const Type* t = velt_type(s); | 732 int bsize = type2aelembytes(velt_basic_type(s)); |
604 BasicType bt = t->array_element_basic_type(); | |
605 int bsize = type2aelembytes(bt); | |
606 assert(bsize != 0, "valid size"); | 733 assert(bsize != 0, "valid size"); |
607 return bsize; | 734 return bsize; |
608 } | 735 } |
609 | 736 |
610 //------------------------------extend_packlist--------------------------- | 737 //------------------------------extend_packlist--------------------------- |
629 } | 756 } |
630 | 757 |
631 //------------------------------follow_use_defs--------------------------- | 758 //------------------------------follow_use_defs--------------------------- |
632 // Extend the packset by visiting operand definitions of nodes in pack p | 759 // Extend the packset by visiting operand definitions of nodes in pack p |
633 bool SuperWord::follow_use_defs(Node_List* p) { | 760 bool SuperWord::follow_use_defs(Node_List* p) { |
761 assert(p->size() == 2, "just checking"); | |
634 Node* s1 = p->at(0); | 762 Node* s1 = p->at(0); |
635 Node* s2 = p->at(1); | 763 Node* s2 = p->at(1); |
636 assert(p->size() == 2, "just checking"); | |
637 assert(s1->req() == s2->req(), "just checking"); | 764 assert(s1->req() == s2->req(), "just checking"); |
638 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); | 765 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
639 | 766 |
640 if (s1->is_Load()) return false; | 767 if (s1->is_Load()) return false; |
641 | 768 |
716 uint i2 = 0; | 843 uint i2 = 0; |
717 do { | 844 do { |
718 for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; | 845 for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; |
719 for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; | 846 for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; |
720 if (i1 != i2) { | 847 if (i1 != i2) { |
721 return false; | 848 if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { |
849 // Further analysis relies on operands position matching. | |
850 u2->swap_edges(i1, i2); | |
851 } else { | |
852 return false; | |
853 } | |
722 } | 854 } |
723 } while (i1 < ct); | 855 } while (i1 < ct); |
724 return true; | 856 return true; |
725 } | 857 } |
726 | 858 |
727 //------------------------------est_savings--------------------------- | 859 //------------------------------est_savings--------------------------- |
728 // Estimate the savings from executing s1 and s2 as a pack | 860 // Estimate the savings from executing s1 and s2 as a pack |
729 int SuperWord::est_savings(Node* s1, Node* s2) { | 861 int SuperWord::est_savings(Node* s1, Node* s2) { |
730 int save = 2 - 1; // 2 operations per instruction in packed form | 862 int save_in = 2 - 1; // 2 operations per instruction in packed form |
731 | 863 |
732 // inputs | 864 // inputs |
733 for (uint i = 1; i < s1->req(); i++) { | 865 for (uint i = 1; i < s1->req(); i++) { |
734 Node* x1 = s1->in(i); | 866 Node* x1 = s1->in(i); |
735 Node* x2 = s2->in(i); | 867 Node* x2 = s2->in(i); |
736 if (x1 != x2) { | 868 if (x1 != x2) { |
737 if (are_adjacent_refs(x1, x2)) { | 869 if (are_adjacent_refs(x1, x2)) { |
738 save += adjacent_profit(x1, x2); | 870 save_in += adjacent_profit(x1, x2); |
739 } else if (!in_packset(x1, x2)) { | 871 } else if (!in_packset(x1, x2)) { |
740 save -= pack_cost(2); | 872 save_in -= pack_cost(2); |
741 } else { | 873 } else { |
742 save += unpack_cost(2); | 874 save_in += unpack_cost(2); |
743 } | 875 } |
744 } | 876 } |
745 } | 877 } |
746 | 878 |
747 // uses of result | 879 // uses of result |
748 uint ct = 0; | 880 uint ct = 0; |
881 int save_use = 0; | |
749 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { | 882 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
750 Node* s1_use = s1->fast_out(i); | 883 Node* s1_use = s1->fast_out(i); |
751 for (int j = 0; j < _packset.length(); j++) { | 884 for (int j = 0; j < _packset.length(); j++) { |
752 Node_List* p = _packset.at(j); | 885 Node_List* p = _packset.at(j); |
753 if (p->at(0) == s1_use) { | 886 if (p->at(0) == s1_use) { |
754 for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { | 887 for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { |
755 Node* s2_use = s2->fast_out(k); | 888 Node* s2_use = s2->fast_out(k); |
756 if (p->at(p->size()-1) == s2_use) { | 889 if (p->at(p->size()-1) == s2_use) { |
757 ct++; | 890 ct++; |
758 if (are_adjacent_refs(s1_use, s2_use)) { | 891 if (are_adjacent_refs(s1_use, s2_use)) { |
759 save += adjacent_profit(s1_use, s2_use); | 892 save_use += adjacent_profit(s1_use, s2_use); |
760 } | 893 } |
761 } | 894 } |
762 } | 895 } |
763 } | 896 } |
764 } | 897 } |
765 } | 898 } |
766 | 899 |
767 if (ct < s1->outcnt()) save += unpack_cost(1); | 900 if (ct < s1->outcnt()) save_use += unpack_cost(1); |
768 if (ct < s2->outcnt()) save += unpack_cost(1); | 901 if (ct < s2->outcnt()) save_use += unpack_cost(1); |
769 | 902 |
770 return save; | 903 return MAX2(save_in, save_use); |
771 } | 904 } |
772 | 905 |
773 //------------------------------costs--------------------------- | 906 //------------------------------costs--------------------------- |
774 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } | 907 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } |
775 int SuperWord::pack_cost(int ct) { return ct; } | 908 int SuperWord::pack_cost(int ct) { return ct; } |
776 int SuperWord::unpack_cost(int ct) { return ct; } | 909 int SuperWord::unpack_cost(int ct) { return ct; } |
777 | 910 |
778 //------------------------------combine_packs--------------------------- | 911 //------------------------------combine_packs--------------------------- |
779 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last | 912 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last |
780 void SuperWord::combine_packs() { | 913 void SuperWord::combine_packs() { |
781 bool changed; | 914 bool changed = true; |
782 do { | 915 // Combine packs regardless max vector size. |
916 while (changed) { | |
783 changed = false; | 917 changed = false; |
784 for (int i = 0; i < _packset.length(); i++) { | 918 for (int i = 0; i < _packset.length(); i++) { |
785 Node_List* p1 = _packset.at(i); | 919 Node_List* p1 = _packset.at(i); |
786 if (p1 == NULL) continue; | 920 if (p1 == NULL) continue; |
787 for (int j = 0; j < _packset.length(); j++) { | 921 for (int j = 0; j < _packset.length(); j++) { |
788 Node_List* p2 = _packset.at(j); | 922 Node_List* p2 = _packset.at(j); |
789 if (p2 == NULL) continue; | 923 if (p2 == NULL) continue; |
924 if (i == j) continue; | |
790 if (p1->at(p1->size()-1) == p2->at(0)) { | 925 if (p1->at(p1->size()-1) == p2->at(0)) { |
791 for (uint k = 1; k < p2->size(); k++) { | 926 for (uint k = 1; k < p2->size(); k++) { |
792 p1->push(p2->at(k)); | 927 p1->push(p2->at(k)); |
793 } | 928 } |
794 _packset.at_put(j, NULL); | 929 _packset.at_put(j, NULL); |
795 changed = true; | 930 changed = true; |
796 } | 931 } |
797 } | 932 } |
798 } | 933 } |
799 } while (changed); | 934 } |
800 | 935 |
936 // Split packs which have size greater then max vector size. | |
937 for (int i = 0; i < _packset.length(); i++) { | |
938 Node_List* p1 = _packset.at(i); | |
939 if (p1 != NULL) { | |
940 BasicType bt = velt_basic_type(p1->at(0)); | |
941 uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector | |
942 assert(is_power_of_2(max_vlen), "sanity"); | |
943 uint psize = p1->size(); | |
944 if (!is_power_of_2(psize)) { | |
945 // Skip pack which can't be vector. | |
946 // case1: for(...) { a[i] = i; } elements values are different (i+x) | |
947 // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store | |
948 _packset.at_put(i, NULL); | |
949 continue; | |
950 } | |
951 if (psize > max_vlen) { | |
952 Node_List* pack = new Node_List(); | |
953 for (uint j = 0; j < psize; j++) { | |
954 pack->push(p1->at(j)); | |
955 if (pack->size() >= max_vlen) { | |
956 assert(is_power_of_2(pack->size()), "sanity"); | |
957 _packset.append(pack); | |
958 pack = new Node_List(); | |
959 } | |
960 } | |
961 _packset.at_put(i, NULL); | |
962 } | |
963 } | |
964 } | |
965 | |
966 // Compress list. | |
801 for (int i = _packset.length() - 1; i >= 0; i--) { | 967 for (int i = _packset.length() - 1; i >= 0; i--) { |
802 Node_List* p1 = _packset.at(i); | 968 Node_List* p1 = _packset.at(i); |
803 if (p1 == NULL) { | 969 if (p1 == NULL) { |
804 _packset.remove_at(i); | 970 _packset.remove_at(i); |
805 } | 971 } |
878 | 1044 |
879 //------------------------------implemented--------------------------- | 1045 //------------------------------implemented--------------------------- |
880 // Can code be generated for pack p? | 1046 // Can code be generated for pack p? |
881 bool SuperWord::implemented(Node_List* p) { | 1047 bool SuperWord::implemented(Node_List* p) { |
882 Node* p0 = p->at(0); | 1048 Node* p0 = p->at(0); |
883 int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); | 1049 return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0)); |
884 return vopc > 0 && Matcher::has_match_rule(vopc); | |
885 } | 1050 } |
886 | 1051 |
887 //------------------------------profitable--------------------------- | 1052 //------------------------------profitable--------------------------- |
888 // For pack p, are all operands and all uses (with in the block) vector? | 1053 // For pack p, are all operands and all uses (with in the block) vector? |
889 bool SuperWord::profitable(Node_List* p) { | 1054 bool SuperWord::profitable(Node_List* p) { |
937 co_locate_pack(_packset.at(i)); | 1102 co_locate_pack(_packset.at(i)); |
938 } | 1103 } |
939 } | 1104 } |
940 | 1105 |
941 //-------------------------------remove_and_insert------------------- | 1106 //-------------------------------remove_and_insert------------------- |
942 //remove "current" from its current position in the memory graph and insert | 1107 // Remove "current" from its current position in the memory graph and insert |
943 //it after the appropriate insertion point (lip or uip) | 1108 // it after the appropriate insertion point (lip or uip). |
944 void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, | 1109 void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, |
945 Node *uip, Unique_Node_List &sched_before) { | 1110 Node *uip, Unique_Node_List &sched_before) { |
946 Node* my_mem = current->in(MemNode::Memory); | 1111 Node* my_mem = current->in(MemNode::Memory); |
947 _igvn.rehash_node_delayed(current); | 1112 bool sched_up = sched_before.member(current); |
948 _igvn.hash_delete(my_mem); | 1113 |
949 | 1114 // remove current_store from its current position in the memmory graph |
950 //remove current_store from its current position in the memmory graph | |
951 for (DUIterator i = current->outs(); current->has_out(i); i++) { | 1115 for (DUIterator i = current->outs(); current->has_out(i); i++) { |
952 Node* use = current->out(i); | 1116 Node* use = current->out(i); |
953 if (use->is_Mem()) { | 1117 if (use->is_Mem()) { |
954 assert(use->in(MemNode::Memory) == current, "must be"); | 1118 assert(use->in(MemNode::Memory) == current, "must be"); |
955 _igvn.rehash_node_delayed(use); | |
956 if (use == prev) { // connect prev to my_mem | 1119 if (use == prev) { // connect prev to my_mem |
957 use->set_req(MemNode::Memory, my_mem); | 1120 _igvn.replace_input_of(use, MemNode::Memory, my_mem); |
1121 --i; //deleted this edge; rescan position | |
958 } else if (sched_before.member(use)) { | 1122 } else if (sched_before.member(use)) { |
959 _igvn.hash_delete(uip); | 1123 if (!sched_up) { // Will be moved together with current |
960 use->set_req(MemNode::Memory, uip); | 1124 _igvn.replace_input_of(use, MemNode::Memory, uip); |
1125 --i; //deleted this edge; rescan position | |
1126 } | |
961 } else { | 1127 } else { |
962 _igvn.hash_delete(lip); | 1128 if (sched_up) { // Will be moved together with current |
963 use->set_req(MemNode::Memory, lip); | 1129 _igvn.replace_input_of(use, MemNode::Memory, lip); |
964 } | 1130 --i; //deleted this edge; rescan position |
965 --i; //deleted this edge; rescan position | 1131 } |
966 } | 1132 } |
967 } | 1133 } |
968 | 1134 } |
969 bool sched_up = sched_before.member(current); | 1135 |
970 Node *insert_pt = sched_up ? uip : lip; | 1136 Node *insert_pt = sched_up ? uip : lip; |
971 _igvn.hash_delete(insert_pt); | |
972 | 1137 |
973 // all uses of insert_pt's memory state should use current's instead | 1138 // all uses of insert_pt's memory state should use current's instead |
974 for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { | 1139 for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { |
975 Node* use = insert_pt->out(i); | 1140 Node* use = insert_pt->out(i); |
976 if (use->is_Mem()) { | 1141 if (use->is_Mem()) { |
986 --i; | 1151 --i; |
987 } | 1152 } |
988 } | 1153 } |
989 | 1154 |
990 //connect current to insert_pt | 1155 //connect current to insert_pt |
991 current->set_req(MemNode::Memory, insert_pt); | 1156 _igvn.replace_input_of(current, MemNode::Memory, insert_pt); |
992 } | 1157 } |
993 | 1158 |
994 //------------------------------co_locate_pack---------------------------------- | 1159 //------------------------------co_locate_pack---------------------------------- |
995 // To schedule a store pack, we need to move any sandwiched memory ops either before | 1160 // To schedule a store pack, we need to move any sandwiched memory ops either before |
996 // or after the pack, based upon dependence information: | 1161 // or after the pack, based upon dependence information: |
1023 for (DUIterator i = current->outs(); current->has_out(i); i++) { | 1188 for (DUIterator i = current->outs(); current->has_out(i); i++) { |
1024 Node* use = current->out(i); | 1189 Node* use = current->out(i); |
1025 if (use->is_Mem() && use != previous) | 1190 if (use->is_Mem() && use != previous) |
1026 memops.push(use); | 1191 memops.push(use); |
1027 } | 1192 } |
1028 if(current == first) break; | 1193 if (current == first) break; |
1029 previous = current; | 1194 previous = current; |
1030 current = current->in(MemNode::Memory)->as_Mem(); | 1195 current = current->in(MemNode::Memory)->as_Mem(); |
1031 } | 1196 } |
1032 | 1197 |
1033 // determine which memory operations should be scheduled before the pack | 1198 // determine which memory operations should be scheduled before the pack |
1036 if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { | 1201 if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { |
1037 for (uint j = 0; j< i; j++) { | 1202 for (uint j = 0; j< i; j++) { |
1038 Node *s2 = memops.at(j); | 1203 Node *s2 = memops.at(j); |
1039 if (!independent(s1, s2)) { | 1204 if (!independent(s1, s2)) { |
1040 if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { | 1205 if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { |
1041 schedule_before_pack.push(s1); //s1 must be scheduled before | 1206 schedule_before_pack.push(s1); // s1 must be scheduled before |
1042 Node_List* mem_pk = my_pack(s1); | 1207 Node_List* mem_pk = my_pack(s1); |
1043 if (mem_pk != NULL) { | 1208 if (mem_pk != NULL) { |
1044 for (uint ii = 0; ii < mem_pk->size(); ii++) { | 1209 for (uint ii = 0; ii < mem_pk->size(); ii++) { |
1045 Node* s = mem_pk->at(ii); // follow partner | 1210 Node* s = mem_pk->at(ii); // follow partner |
1046 if (memops.member(s) && !schedule_before_pack.member(s)) | 1211 if (memops.member(s) && !schedule_before_pack.member(s)) |
1047 schedule_before_pack.push(s); | 1212 schedule_before_pack.push(s); |
1048 } | 1213 } |
1049 } | 1214 } |
1215 break; | |
1050 } | 1216 } |
1051 } | 1217 } |
1052 } | 1218 } |
1053 } | 1219 } |
1054 } | 1220 } |
1055 | 1221 |
1222 Node* upper_insert_pt = first->in(MemNode::Memory); | |
1223 // Following code moves loads connected to upper_insert_pt below aliased stores. | |
1224 // Collect such loads here and reconnect them back to upper_insert_pt later. | |
1225 memops.clear(); | |
1226 for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) { | |
1227 Node* use = upper_insert_pt->out(i); | |
1228 if (!use->is_Store()) | |
1229 memops.push(use); | |
1230 } | |
1231 | |
1056 MemNode* lower_insert_pt = last; | 1232 MemNode* lower_insert_pt = last; |
1057 Node* upper_insert_pt = first->in(MemNode::Memory); | |
1058 previous = last; //previous store in pk | 1233 previous = last; //previous store in pk |
1059 current = last->in(MemNode::Memory)->as_Mem(); | 1234 current = last->in(MemNode::Memory)->as_Mem(); |
1060 | 1235 |
1061 //start scheduling from "last" to "first" | 1236 // start scheduling from "last" to "first" |
1062 while (true) { | 1237 while (true) { |
1063 assert(in_bb(current), "stay in block"); | 1238 assert(in_bb(current), "stay in block"); |
1064 assert(in_pack(previous, pk), "previous stays in pack"); | 1239 assert(in_pack(previous, pk), "previous stays in pack"); |
1065 Node* my_mem = current->in(MemNode::Memory); | 1240 Node* my_mem = current->in(MemNode::Memory); |
1066 | 1241 |
1067 if (in_pack(current, pk)) { | 1242 if (in_pack(current, pk)) { |
1068 // Forward users of my memory state (except "previous) to my input memory state | 1243 // Forward users of my memory state (except "previous) to my input memory state |
1069 _igvn.hash_delete(current); | |
1070 for (DUIterator i = current->outs(); current->has_out(i); i++) { | 1244 for (DUIterator i = current->outs(); current->has_out(i); i++) { |
1071 Node* use = current->out(i); | 1245 Node* use = current->out(i); |
1072 if (use->is_Mem() && use != previous) { | 1246 if (use->is_Mem() && use != previous) { |
1073 assert(use->in(MemNode::Memory) == current, "must be"); | 1247 assert(use->in(MemNode::Memory) == current, "must be"); |
1074 if (schedule_before_pack.member(use)) { | 1248 if (schedule_before_pack.member(use)) { |
1075 _igvn.hash_delete(upper_insert_pt); | |
1076 _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt); | 1249 _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt); |
1077 } else { | 1250 } else { |
1078 _igvn.hash_delete(lower_insert_pt); | |
1079 _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt); | 1251 _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt); |
1080 } | 1252 } |
1081 --i; // deleted this edge; rescan position | 1253 --i; // deleted this edge; rescan position |
1082 } | 1254 } |
1083 } | 1255 } |
1087 } | 1259 } |
1088 | 1260 |
1089 if (current == first) break; | 1261 if (current == first) break; |
1090 current = my_mem->as_Mem(); | 1262 current = my_mem->as_Mem(); |
1091 } // end while | 1263 } // end while |
1264 | |
1265 // Reconnect loads back to upper_insert_pt. | |
1266 for (uint i = 0; i < memops.size(); i++) { | |
1267 Node *ld = memops.at(i); | |
1268 if (ld->in(MemNode::Memory) != upper_insert_pt) { | |
1269 _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt); | |
1270 } | |
1271 } | |
1092 } else if (pk->at(0)->is_Load()) { //load | 1272 } else if (pk->at(0)->is_Load()) { //load |
1093 // all loads in the pack should have the same memory state. By default, | 1273 // all loads in the pack should have the same memory state. By default, |
1094 // we use the memory state of the last load. However, if any load could | 1274 // we use the memory state of the last load. However, if any load could |
1095 // not be moved down due to the dependence constraint, we use the memory | 1275 // not be moved down due to the dependence constraint, we use the memory |
1096 // state of the first load. | 1276 // state of the first load. |
1147 if (p && n == executed_last(p)) { | 1327 if (p && n == executed_last(p)) { |
1148 uint vlen = p->size(); | 1328 uint vlen = p->size(); |
1149 Node* vn = NULL; | 1329 Node* vn = NULL; |
1150 Node* low_adr = p->at(0); | 1330 Node* low_adr = p->at(0); |
1151 Node* first = executed_first(p); | 1331 Node* first = executed_first(p); |
1332 int opc = n->Opcode(); | |
1152 if (n->is_Load()) { | 1333 if (n->is_Load()) { |
1153 int opc = n->Opcode(); | |
1154 Node* ctl = n->in(MemNode::Control); | 1334 Node* ctl = n->in(MemNode::Control); |
1155 Node* mem = first->in(MemNode::Memory); | 1335 Node* mem = first->in(MemNode::Memory); |
1156 Node* adr = low_adr->in(MemNode::Address); | 1336 Node* adr = low_adr->in(MemNode::Address); |
1157 const TypePtr* atyp = n->adr_type(); | 1337 const TypePtr* atyp = n->adr_type(); |
1158 vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); | 1338 vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); |
1159 | |
1160 } else if (n->is_Store()) { | 1339 } else if (n->is_Store()) { |
1161 // Promote value to be stored to vector | 1340 // Promote value to be stored to vector |
1162 Node* val = vector_opd(p, MemNode::ValueIn); | 1341 Node* val = vector_opd(p, MemNode::ValueIn); |
1163 | |
1164 int opc = n->Opcode(); | |
1165 Node* ctl = n->in(MemNode::Control); | 1342 Node* ctl = n->in(MemNode::Control); |
1166 Node* mem = first->in(MemNode::Memory); | 1343 Node* mem = first->in(MemNode::Memory); |
1167 Node* adr = low_adr->in(MemNode::Address); | 1344 Node* adr = low_adr->in(MemNode::Address); |
1168 const TypePtr* atyp = n->adr_type(); | 1345 const TypePtr* atyp = n->adr_type(); |
1169 vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); | 1346 vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
1170 | |
1171 } else if (n->req() == 3) { | 1347 } else if (n->req() == 3) { |
1172 // Promote operands to vector | 1348 // Promote operands to vector |
1173 Node* in1 = vector_opd(p, 1); | 1349 Node* in1 = vector_opd(p, 1); |
1174 Node* in2 = vector_opd(p, 2); | 1350 Node* in2 = vector_opd(p, 2); |
1175 vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); | 1351 vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); |
1176 | |
1177 } else { | 1352 } else { |
1178 ShouldNotReachHere(); | 1353 ShouldNotReachHere(); |
1179 } | 1354 } |
1180 | 1355 assert(vn != NULL, "sanity"); |
1181 _phase->_igvn.register_new_node_with_optimizer(vn); | 1356 _phase->_igvn.register_new_node_with_optimizer(vn); |
1182 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); | 1357 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); |
1183 for (uint j = 0; j < p->size(); j++) { | 1358 for (uint j = 0; j < p->size(); j++) { |
1184 Node* pm = p->at(j); | 1359 Node* pm = p->at(j); |
1185 _igvn.replace_node(pm, vn); | 1360 _igvn.replace_node(pm, vn); |
1186 } | 1361 } |
1187 _igvn._worklist.push(vn); | 1362 _igvn._worklist.push(vn); |
1363 #ifdef ASSERT | |
1364 if (TraceSuperWord) { | |
1365 tty->print("new Vector node: "); | |
1366 vn->dump(); | |
1367 } | |
1368 #endif | |
1188 } | 1369 } |
1189 } | 1370 } |
1190 } | 1371 } |
1191 | 1372 |
1192 //------------------------------vector_opd--------------------------- | 1373 //------------------------------vector_opd--------------------------- |
1205 break; | 1386 break; |
1206 } | 1387 } |
1207 } | 1388 } |
1208 | 1389 |
1209 if (same_opd) { | 1390 if (same_opd) { |
1210 if (opd->is_Vector() || opd->is_VectorLoad()) { | 1391 if (opd->is_Vector() || opd->is_LoadVector()) { |
1211 return opd; // input is matching vector | 1392 return opd; // input is matching vector |
1212 } | 1393 } |
1213 assert(!opd->is_VectorStore(), "such vector is not expected here"); | 1394 assert(!opd->is_StoreVector(), "such vector is not expected here"); |
1214 // Convert scalar input to vector with the same number of elements as | 1395 // Convert scalar input to vector with the same number of elements as |
1215 // p0's vector. Use p0's type because size of operand's container in | 1396 // p0's vector. Use p0's type because size of operand's container in |
1216 // vector should match p0's size regardless operand's size. | 1397 // vector should match p0's size regardless operand's size. |
1217 const Type* p0_t = velt_type(p0); | 1398 const Type* p0_t = velt_type(p0); |
1218 VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t); | 1399 VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t); |
1219 | 1400 |
1220 _phase->_igvn.register_new_node_with_optimizer(vn); | 1401 _phase->_igvn.register_new_node_with_optimizer(vn); |
1221 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); | 1402 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
1403 #ifdef ASSERT | |
1404 if (TraceSuperWord) { | |
1405 tty->print("new Vector node: "); | |
1406 vn->dump(); | |
1407 } | |
1408 #endif | |
1222 return vn; | 1409 return vn; |
1223 } | 1410 } |
1224 | 1411 |
1225 // Insert pack operation | 1412 // Insert pack operation |
1226 const Type* p0_t = velt_type(p0); | 1413 BasicType bt = velt_basic_type(p0); |
1227 PackNode* pk = PackNode::make(_phase->C, opd, p0_t); | 1414 PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt); |
1228 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) | 1415 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) |
1229 | 1416 |
1230 for (uint i = 1; i < vlen; i++) { | 1417 for (uint i = 1; i < vlen; i++) { |
1231 Node* pi = p->at(i); | 1418 Node* pi = p->at(i); |
1232 Node* in = pi->in(opd_idx); | 1419 Node* in = pi->in(opd_idx); |
1233 assert(my_pack(in) == NULL, "Should already have been unpacked"); | 1420 assert(my_pack(in) == NULL, "Should already have been unpacked"); |
1234 assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); | 1421 assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); |
1235 pk->add_opd(in); | 1422 pk->add_opd(i, in); |
1236 } | 1423 } |
1237 _phase->_igvn.register_new_node_with_optimizer(pk); | 1424 _phase->_igvn.register_new_node_with_optimizer(pk); |
1238 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); | 1425 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
1426 #ifdef ASSERT | |
1427 if (TraceSuperWord) { | |
1428 tty->print("new Pack node: "); | |
1429 pk->dump(); | |
1430 } | |
1431 #endif | |
1239 return pk; | 1432 return pk; |
1240 } | 1433 } |
1241 | 1434 |
1242 //------------------------------insert_extracts--------------------------- | 1435 //------------------------------insert_extracts--------------------------- |
1243 // If a use of pack p is not a vector use, then replace the | 1436 // If a use of pack p is not a vector use, then replace the |
1271 Node* def = use->in(idx); | 1464 Node* def = use->in(idx); |
1272 | 1465 |
1273 // Insert extract operation | 1466 // Insert extract operation |
1274 _igvn.hash_delete(def); | 1467 _igvn.hash_delete(def); |
1275 int def_pos = alignment(def) / data_size(def); | 1468 int def_pos = alignment(def) / data_size(def); |
1276 const Type* def_t = velt_type(def); | 1469 |
1277 | 1470 Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def)); |
1278 Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); | |
1279 _phase->_igvn.register_new_node_with_optimizer(ex); | 1471 _phase->_igvn.register_new_node_with_optimizer(ex); |
1280 _phase->set_ctrl(ex, _phase->get_ctrl(def)); | 1472 _phase->set_ctrl(ex, _phase->get_ctrl(def)); |
1281 _igvn.replace_input_of(use, idx, ex); | 1473 _igvn.replace_input_of(use, idx, ex); |
1282 _igvn._worklist.push(def); | 1474 _igvn._worklist.push(def); |
1283 | 1475 |
1284 bb_insert_after(ex, bb_idx(def)); | 1476 bb_insert_after(ex, bb_idx(def)); |
1285 set_velt_type(ex, def_t); | 1477 set_velt_type(ex, velt_type(def)); |
1286 } | 1478 } |
1287 } | 1479 } |
1288 | 1480 |
1289 //------------------------------is_vector_use--------------------------- | 1481 //------------------------------is_vector_use--------------------------- |
1290 // Is use->in(u_idx) a vector use? | 1482 // Is use->in(u_idx) a vector use? |
1507 #endif | 1699 #endif |
1508 | 1700 |
1509 // Initial type | 1701 // Initial type |
1510 for (int i = 0; i < _block.length(); i++) { | 1702 for (int i = 0; i < _block.length(); i++) { |
1511 Node* n = _block.at(i); | 1703 Node* n = _block.at(i); |
1512 const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type()) | 1704 set_velt_type(n, container_type(n)); |
1513 : _igvn.type(n); | |
1514 const Type* vt = container_type(t); | |
1515 set_velt_type(n, vt); | |
1516 } | 1705 } |
1517 | 1706 |
1518 // Propagate narrowed type backwards through operations | 1707 // Propagate narrowed type backwards through operations |
1519 // that don't depend on higher order bits | 1708 // that don't depend on higher order bits |
1520 for (int i = _block.length() - 1; i >= 0; i--) { | 1709 for (int i = _block.length() - 1; i >= 0; i--) { |
1541 case Op_CMoveI: case Op_CMoveL: | 1730 case Op_CMoveI: case Op_CMoveL: |
1542 if (in_bb(in)) { | 1731 if (in_bb(in)) { |
1543 bool same_type = true; | 1732 bool same_type = true; |
1544 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { | 1733 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
1545 Node *use = in->fast_out(k); | 1734 Node *use = in->fast_out(k); |
1546 if (!in_bb(use) || velt_type(use) != vt) { | 1735 if (!in_bb(use) || !same_velt_type(use, n)) { |
1547 same_type = false; | 1736 same_type = false; |
1548 break; | 1737 break; |
1549 } | 1738 } |
1550 } | 1739 } |
1551 if (same_type) { | 1740 if (same_type) { |
1573 int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { | 1762 int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
1574 SWPointer p(s, this); | 1763 SWPointer p(s, this); |
1575 if (!p.valid()) { | 1764 if (!p.valid()) { |
1576 return bottom_align; | 1765 return bottom_align; |
1577 } | 1766 } |
1767 int vw = vector_width_in_bytes(velt_basic_type(s)); | |
1768 if (vw < 2) { | |
1769 return bottom_align; // No vectors for this type | |
1770 } | |
1578 int offset = p.offset_in_bytes(); | 1771 int offset = p.offset_in_bytes(); |
1579 offset += iv_adjust_in_bytes; | 1772 offset += iv_adjust_in_bytes; |
1580 int off_rem = offset % vector_width_in_bytes(); | 1773 int off_rem = offset % vw; |
1581 int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); | 1774 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; |
1582 return off_mod; | 1775 return off_mod; |
1583 } | 1776 } |
1584 | 1777 |
1585 //---------------------------container_type--------------------------- | 1778 //---------------------------container_type--------------------------- |
1586 // Smallest type containing range of values | 1779 // Smallest type containing range of values |
1587 const Type* SuperWord::container_type(const Type* t) { | 1780 const Type* SuperWord::container_type(Node* n) { |
1588 const Type* tp = t->make_ptr(); | 1781 if (n->is_Mem()) { |
1589 if (tp && tp->isa_aryptr()) { | 1782 return Type::get_const_basic_type(n->as_Mem()->memory_type()); |
1590 t = tp->is_aryptr()->elem(); | 1783 } |
1591 } | 1784 const Type* t = _igvn.type(n); |
1592 if (t->basic_type() == T_INT) { | 1785 if (t->basic_type() == T_INT) { |
1593 if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; | 1786 if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; |
1594 if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; | 1787 if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; |
1595 if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; | 1788 if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; |
1596 if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; | 1789 if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; |
1597 return TypeInt::INT; | 1790 return TypeInt::INT; |
1598 } | 1791 } |
1599 return t; | 1792 return t; |
1600 } | 1793 } |
1601 | 1794 |
1795 bool SuperWord::same_velt_type(Node* n1, Node* n2) { | |
1796 const Type* vt1 = velt_type(n1); | |
1797 const Type* vt2 = velt_type(n1); | |
1798 if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) { | |
1799 // Compare vectors element sizes for integer types. | |
1800 return data_size(n1) == data_size(n2); | |
1801 } | |
1802 return vt1 == vt2; | |
1803 } | |
1804 | |
1602 //-------------------------vector_opd_range----------------------- | 1805 //-------------------------vector_opd_range----------------------- |
1603 // (Start, end] half-open range defining which operands are vector | 1806 // (Start, end] half-open range defining which operands are vector |
1604 void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { | 1807 void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { |
1605 switch (n->Opcode()) { | 1808 switch (n->Opcode()) { |
1606 case Op_LoadB: case Op_LoadUS: | 1809 case Op_LoadB: case Op_LoadUB: |
1810 case Op_LoadS: case Op_LoadUS: | |
1607 case Op_LoadI: case Op_LoadL: | 1811 case Op_LoadI: case Op_LoadL: |
1608 case Op_LoadF: case Op_LoadD: | 1812 case Op_LoadF: case Op_LoadD: |
1609 case Op_LoadP: | 1813 case Op_LoadP: |
1610 *start = 0; | 1814 *start = 0; |
1611 *end = 0; | 1815 *end = 0; |
1719 // pre-loop Opaque1 node. | 1923 // pre-loop Opaque1 node. |
1720 Node *orig_limit = pre_opaq->original_loop_limit(); | 1924 Node *orig_limit = pre_opaq->original_loop_limit(); |
1721 assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); | 1925 assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); |
1722 | 1926 |
1723 SWPointer align_to_ref_p(align_to_ref, this); | 1927 SWPointer align_to_ref_p(align_to_ref, this); |
1928 assert(align_to_ref_p.valid(), "sanity"); | |
1724 | 1929 |
1725 // Given: | 1930 // Given: |
1726 // lim0 == original pre loop limit | 1931 // lim0 == original pre loop limit |
1727 // V == v_align (power of 2) | 1932 // V == v_align (power of 2) |
1728 // invar == extra invariant piece of the address expression | 1933 // invar == extra invariant piece of the address expression |
1771 // Solving for lim: | 1976 // Solving for lim: |
1772 // (e - lim0 + N) % V == 0 | 1977 // (e - lim0 + N) % V == 0 |
1773 // N = (V - (e - lim0)) % V | 1978 // N = (V - (e - lim0)) % V |
1774 // lim = lim0 - (V - (e - lim0)) % V | 1979 // lim = lim0 - (V - (e - lim0)) % V |
1775 | 1980 |
1981 int vw = vector_width_in_bytes(velt_basic_type(align_to_ref)); | |
1982 assert(vw > 1, "sanity"); | |
1776 int stride = iv_stride(); | 1983 int stride = iv_stride(); |
1777 int scale = align_to_ref_p.scale_in_bytes(); | 1984 int scale = align_to_ref_p.scale_in_bytes(); |
1778 int elt_size = align_to_ref_p.memory_size(); | 1985 int elt_size = align_to_ref_p.memory_size(); |
1779 int v_align = vector_width_in_bytes() / elt_size; | 1986 int v_align = vw / elt_size; |
1780 int k = align_to_ref_p.offset_in_bytes() / elt_size; | 1987 int k = align_to_ref_p.offset_in_bytes() / elt_size; |
1781 | 1988 |
1782 Node *kn = _igvn.intcon(k); | 1989 Node *kn = _igvn.intcon(k); |
1783 | 1990 |
1784 Node *e = kn; | 1991 Node *e = kn; |
1791 if (align_to_ref_p.negate_invar()) { | 1998 if (align_to_ref_p.negate_invar()) { |
1792 e = new (_phase->C, 3) SubINode(e, aref); | 1999 e = new (_phase->C, 3) SubINode(e, aref); |
1793 } else { | 2000 } else { |
1794 e = new (_phase->C, 3) AddINode(e, aref); | 2001 e = new (_phase->C, 3) AddINode(e, aref); |
1795 } | 2002 } |
2003 _phase->_igvn.register_new_node_with_optimizer(e); | |
2004 _phase->set_ctrl(e, pre_ctrl); | |
2005 } | |
2006 if (vw > ObjectAlignmentInBytes) { | |
2007 // incorporate base e +/- base && Mask >>> log2(elt) | |
2008 Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); | |
2009 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); | |
2010 _phase->_igvn.register_new_node_with_optimizer(xbase); | |
2011 Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); | |
2012 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); | |
2013 #ifdef _LP64 | |
2014 masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); | |
2015 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); | |
2016 #endif | |
2017 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); | |
2018 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); | |
2019 _phase->_igvn.register_new_node_with_optimizer(bref); | |
2020 _phase->set_ctrl(bref, pre_ctrl); | |
2021 e = new (_phase->C, 3) AddINode(e, bref); | |
1796 _phase->_igvn.register_new_node_with_optimizer(e); | 2022 _phase->_igvn.register_new_node_with_optimizer(e); |
1797 _phase->set_ctrl(e, pre_ctrl); | 2023 _phase->set_ctrl(e, pre_ctrl); |
1798 } | 2024 } |
1799 | 2025 |
1800 // compute e +/- lim0 | 2026 // compute e +/- lim0 |