Mercurial > hg > graal-jvmci-8
comparison src/share/vm/opto/regmask.cpp @ 6179:8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
author | kvn |
---|---|
date | Fri, 15 Jun 2012 01:25:19 -0700 |
parents | 1d1603768966 |
children | a7114d3d712e |
comparison
equal
deleted
inserted
replaced
6146:eba1d5bce9e8 | 6179:8c92982cbbc4 |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. | 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | 4 * |
5 * This code is free software; you can redistribute it and/or modify it | 5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as | 6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
127 FORALL_BODY | 127 FORALL_BODY |
128 # undef BODY | 128 # undef BODY |
129 0 | 129 0 |
130 ); | 130 ); |
131 | 131 |
132 //============================================================================= | |
133 bool RegMask::is_vector(uint ireg) { | |
134 return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY); | |
135 } | |
136 | |
137 int RegMask::num_registers(uint ireg) { | |
138 switch(ireg) { | |
139 case Op_VecY: | |
140 return 8; | |
141 case Op_VecX: | |
142 return 4; | |
143 case Op_VecD: | |
144 case Op_RegD: | |
145 case Op_RegL: | |
146 #ifdef _LP64 | |
147 case Op_RegP: | |
148 #endif | |
149 return 2; | |
150 } | |
151 // Op_VecS and the rest ideal registers. | |
152 return 1; | |
153 } | |
154 | |
132 //------------------------------find_first_pair-------------------------------- | 155 //------------------------------find_first_pair-------------------------------- |
133 // Find the lowest-numbered register pair in the mask. Return the | 156 // Find the lowest-numbered register pair in the mask. Return the |
134 // HIGHEST register number in the pair, or BAD if no pairs. | 157 // HIGHEST register number in the pair, or BAD if no pairs. |
135 OptoReg::Name RegMask::find_first_pair() const { | 158 OptoReg::Name RegMask::find_first_pair() const { |
136 VerifyPairs(); | 159 verify_pairs(); |
137 for( int i = 0; i < RM_SIZE; i++ ) { | 160 for( int i = 0; i < RM_SIZE; i++ ) { |
138 if( _A[i] ) { // Found some bits | 161 if( _A[i] ) { // Found some bits |
139 int bit = _A[i] & -_A[i]; // Extract low bit | 162 int bit = _A[i] & -_A[i]; // Extract low bit |
140 // Convert to bit number, return hi bit in pair | 163 // Convert to bit number, return hi bit in pair |
141 return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1); | 164 return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1); |
144 return OptoReg::Bad; | 167 return OptoReg::Bad; |
145 } | 168 } |
146 | 169 |
147 //------------------------------ClearToPairs----------------------------------- | 170 //------------------------------ClearToPairs----------------------------------- |
148 // Clear out partial bits; leave only bit pairs | 171 // Clear out partial bits; leave only bit pairs |
149 void RegMask::ClearToPairs() { | 172 void RegMask::clear_to_pairs() { |
150 for( int i = 0; i < RM_SIZE; i++ ) { | 173 for( int i = 0; i < RM_SIZE; i++ ) { |
151 int bits = _A[i]; | 174 int bits = _A[i]; |
152 bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair | 175 bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair |
153 bits |= (bits>>1); // Smear 1 hi-bit into a pair | 176 bits |= (bits>>1); // Smear 1 hi-bit into a pair |
154 _A[i] = bits; | 177 _A[i] = bits; |
155 } | 178 } |
156 VerifyPairs(); | 179 verify_pairs(); |
157 } | 180 } |
158 | 181 |
159 //------------------------------SmearToPairs----------------------------------- | 182 //------------------------------SmearToPairs----------------------------------- |
160 // Smear out partial bits; leave only bit pairs | 183 // Smear out partial bits; leave only bit pairs |
161 void RegMask::SmearToPairs() { | 184 void RegMask::smear_to_pairs() { |
162 for( int i = 0; i < RM_SIZE; i++ ) { | 185 for( int i = 0; i < RM_SIZE; i++ ) { |
163 int bits = _A[i]; | 186 int bits = _A[i]; |
164 bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair | 187 bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair |
165 bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair | 188 bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair |
166 _A[i] = bits; | 189 _A[i] = bits; |
167 } | 190 } |
168 VerifyPairs(); | 191 verify_pairs(); |
169 } | 192 } |
170 | 193 |
171 //------------------------------is_aligned_pairs------------------------------- | 194 //------------------------------is_aligned_pairs------------------------------- |
172 bool RegMask::is_aligned_Pairs() const { | 195 bool RegMask::is_aligned_pairs() const { |
173 // Assert that the register mask contains only bit pairs. | 196 // Assert that the register mask contains only bit pairs. |
174 for( int i = 0; i < RM_SIZE; i++ ) { | 197 for( int i = 0; i < RM_SIZE; i++ ) { |
175 int bits = _A[i]; | 198 int bits = _A[i]; |
176 while( bits ) { // Check bits for pairing | 199 while( bits ) { // Check bits for pairing |
177 int bit = bits & -bits; // Extract low bit | 200 int bit = bits & -bits; // Extract low bit |
202 return true; | 225 return true; |
203 } | 226 } |
204 | 227 |
205 //------------------------------is_bound2-------------------------------------- | 228 //------------------------------is_bound2-------------------------------------- |
206 // Return TRUE if the mask contains an adjacent pair of bits and no other bits. | 229 // Return TRUE if the mask contains an adjacent pair of bits and no other bits. |
207 int RegMask::is_bound2() const { | 230 int RegMask::is_bound_pair() const { |
208 if( is_AllStack() ) return false; | 231 if( is_AllStack() ) return false; |
209 | 232 |
210 int bit = -1; // Set to hold the one bit allowed | 233 int bit = -1; // Set to hold the one bit allowed |
211 for( int i = 0; i < RM_SIZE; i++ ) { | 234 for( int i = 0; i < RM_SIZE; i++ ) { |
212 if( _A[i] ) { // Found some bits | 235 if( _A[i] ) { // Found some bits |
221 if( _A[i] != 1 ) return false; // Require 1 lo bit in next word | 244 if( _A[i] != 1 ) return false; // Require 1 lo bit in next word |
222 } | 245 } |
223 } | 246 } |
224 } | 247 } |
225 // True for both the empty mask and for a bit pair | 248 // True for both the empty mask and for a bit pair |
249 return true; | |
250 } | |
251 | |
252 static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 }; | |
253 //------------------------------find_first_set--------------------------------- | |
254 // Find the lowest-numbered register set in the mask. Return the | |
255 // HIGHEST register number in the set, or BAD if no sets. | |
256 // Works also for size 1. | |
257 OptoReg::Name RegMask::find_first_set(int size) const { | |
258 verify_sets(size); | |
259 for (int i = 0; i < RM_SIZE; i++) { | |
260 if (_A[i]) { // Found some bits | |
261 int bit = _A[i] & -_A[i]; // Extract low bit | |
262 // Convert to bit number, return hi bit in pair | |
263 return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1)); | |
264 } | |
265 } | |
266 return OptoReg::Bad; | |
267 } | |
268 | |
269 //------------------------------clear_to_sets---------------------------------- | |
270 // Clear out partial bits; leave only aligned adjacent bit pairs | |
271 void RegMask::clear_to_sets(int size) { | |
272 if (size == 1) return; | |
273 assert(2 <= size && size <= 8, "update low bits table"); | |
274 assert(is_power_of_2(size), "sanity"); | |
275 int low_bits_mask = low_bits[size>>2]; | |
276 for (int i = 0; i < RM_SIZE; i++) { | |
277 int bits = _A[i]; | |
278 int sets = (bits & low_bits_mask); | |
279 for (int j = 1; j < size; j++) { | |
280 sets = (bits & (sets<<1)); // filter bits which produce whole sets | |
281 } | |
282 sets |= (sets>>1); // Smear 1 hi-bit into a set | |
283 if (size > 2) { | |
284 sets |= (sets>>2); // Smear 2 hi-bits into a set | |
285 if (size > 4) { | |
286 sets |= (sets>>4); // Smear 4 hi-bits into a set | |
287 } | |
288 } | |
289 _A[i] = sets; | |
290 } | |
291 verify_sets(size); | |
292 } | |
293 | |
294 //------------------------------smear_to_sets---------------------------------- | |
295 // Smear out partial bits to aligned adjacent bit sets | |
296 void RegMask::smear_to_sets(int size) { | |
297 if (size == 1) return; | |
298 assert(2 <= size && size <= 8, "update low bits table"); | |
299 assert(is_power_of_2(size), "sanity"); | |
300 int low_bits_mask = low_bits[size>>2]; | |
301 for (int i = 0; i < RM_SIZE; i++) { | |
302 int bits = _A[i]; | |
303 int sets = 0; | |
304 for (int j = 0; j < size; j++) { | |
305 sets |= (bits & low_bits_mask); // collect partial bits | |
306 bits = bits>>1; | |
307 } | |
308 sets |= (sets<<1); // Smear 1 lo-bit into a set | |
309 if (size > 2) { | |
310 sets |= (sets<<2); // Smear 2 lo-bits into a set | |
311 if (size > 4) { | |
312 sets |= (sets<<4); // Smear 4 lo-bits into a set | |
313 } | |
314 } | |
315 _A[i] = sets; | |
316 } | |
317 verify_sets(size); | |
318 } | |
319 | |
320 //------------------------------is_aligned_set-------------------------------- | |
321 bool RegMask::is_aligned_sets(int size) const { | |
322 if (size == 1) return true; | |
323 assert(2 <= size && size <= 8, "update low bits table"); | |
324 assert(is_power_of_2(size), "sanity"); | |
325 int low_bits_mask = low_bits[size>>2]; | |
326 // Assert that the register mask contains only bit sets. | |
327 for (int i = 0; i < RM_SIZE; i++) { | |
328 int bits = _A[i]; | |
329 while (bits) { // Check bits for pairing | |
330 int bit = bits & -bits; // Extract low bit | |
331 // Low bit is not odd means its mis-aligned. | |
332 if ((bit & low_bits_mask) == 0) return false; | |
333 // Do extra work since (bit << size) may overflow. | |
334 int hi_bit = bit << (size-1); // high bit | |
335 int set = hi_bit + ((hi_bit-1) & ~(bit-1)); | |
336 // Check for aligned adjacent bits in this set | |
337 if ((bits & set) != set) return false; | |
338 bits -= set; // Remove this set | |
339 } | |
340 } | |
341 return true; | |
342 } | |
343 | |
344 //------------------------------is_bound_set----------------------------------- | |
345 // Return TRUE if the mask contains one adjacent set of bits and no other bits. | |
346 // Works also for size 1. | |
347 int RegMask::is_bound_set(int size) const { | |
348 if( is_AllStack() ) return false; | |
349 assert(1 <= size && size <= 8, "update low bits table"); | |
350 int bit = -1; // Set to hold the one bit allowed | |
351 for (int i = 0; i < RM_SIZE; i++) { | |
352 if (_A[i] ) { // Found some bits | |
353 if (bit != -1) | |
354 return false; // Already had bits, so fail | |
355 bit = _A[i] & -_A[i]; // Extract 1 bit from mask | |
356 int hi_bit = bit << (size-1); // high bit | |
357 if (hi_bit != 0) { // Bit set stays in same word? | |
358 int set = hi_bit + ((hi_bit-1) & ~(bit-1)); | |
359 if (set != _A[i]) | |
360 return false; // Require adjacent bit set and no more bits | |
361 } else { // Else its a split-set case | |
362 if (((-1) & ~(bit-1)) != _A[i]) | |
363 return false; // Found many bits, so fail | |
364 i++; // Skip iteration forward and check high part | |
365 assert(size <= 8, "update next code"); | |
366 // The lower 24 bits should be 0 since it is split case and size <= 8. | |
367 int set = bit>>24; | |
368 set = set & -set; // Remove sign extension. | |
369 set = (((set << size) - 1) >> 8); | |
370 if (_A[i] != set) return false; // Require 1 lo bit in next word | |
371 } | |
372 } | |
373 } | |
374 // True for both the empty mask and for a bit set | |
226 return true; | 375 return true; |
227 } | 376 } |
228 | 377 |
229 //------------------------------is_UP------------------------------------------ | 378 //------------------------------is_UP------------------------------------------ |
230 // UP means register only, Register plus stack, or stack only is DOWN | 379 // UP means register only, Register plus stack, or stack only is DOWN |