| OLD | NEW | 
|    1 ; This is a smoke test of nop insertion. |    1 ; This is a smoke test of nop insertion. | 
|    2  |    2  | 
|    3 ; REQUIRES: allow_dump |    3 ; REQUIRES: allow_dump | 
|    4  |    4  | 
|    5 ; Don't use filetype=iasm because this currently depends on the # variant |    5 ; Don't use filetype=iasm because this currently depends on the # variant | 
|    6 ; assembler comment. |    6 ; assembler comment. | 
|    7 ; RUN: %p2i -i %s -a -sz-seed=1 -nop-insertion -nop-insertion-percentage=50 \ |    7 ; RUN: %p2i -i %s -a -sz-seed=1 -nop-insertion -nop-insertion-percentage=50 \ | 
|    8 ; RUN:    -max-nops-per-instruction=1 -filetype=asm \ |    8 ; RUN:    -max-nops-per-instruction=1 -filetype=asm \ | 
|    9 ; RUN:    | FileCheck %s --check-prefix=PROB50 |    9 ; RUN:    | FileCheck %s --check-prefix=PROB50 | 
|   10 ; RUN: %p2i -i %s -a -sz-seed=1 -nop-insertion -nop-insertion-percentage=90 \ |   10 ; RUN: %p2i -i %s -a -sz-seed=1 -nop-insertion -nop-insertion-percentage=90 \ | 
|   11 ; RUN:    -max-nops-per-instruction=1 -filetype=asm \ |   11 ; RUN:    -max-nops-per-instruction=1 -filetype=asm \ | 
|   12 ; RUN:    | FileCheck %s --check-prefix=PROB90 |   12 ; RUN:    | FileCheck %s --check-prefix=PROB90 | 
|   13 ; RUN: %p2i -i %s -a -sz-seed=1 -nop-insertion -nop-insertion-percentage=50 \ |   13 ; RUN: %p2i -i %s -a -sz-seed=1 -nop-insertion -nop-insertion-percentage=50 \ | 
|   14 ; RUN:    -max-nops-per-instruction=2 -filetype=asm \ |   14 ; RUN:    -max-nops-per-instruction=2 -filetype=asm \ | 
|   15 ; RUN:    | FileCheck %s --check-prefix=MAXNOPS2 |   15 ; RUN:    | FileCheck %s --check-prefix=MAXNOPS2 | 
|   16  |   16  | 
|   17 define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { |   17 define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { | 
|   18 entry: |   18 entry: | 
|   19   %res = mul <4 x i32> %a, %b |   19   %res = mul <4 x i32> %a, %b | 
|   20   ret <4 x i32> %res |   20   ret <4 x i32> %res | 
|   21 ; PROB50-LABEL: mul_v4i32: |   21 ; PROB50-LABEL: mul_v4i32 | 
|   22 ; PROB50: nop # variant = 3 |   22 ; PROB50: nop # variant = 3 | 
|   23 ; PROB50: subl $60, %esp |   23 ; PROB50: subl $60, %esp | 
|   24 ; PROB50: nop # variant = 4 |   24 ; PROB50: nop # variant = 4 | 
|   25 ; PROB50: movups %xmm0, 32(%esp) |   25 ; PROB50: movups %xmm0, 32(%esp) | 
|   26 ; PROB50: movups %xmm1, 16(%esp) |   26 ; PROB50: movups %xmm1, 16(%esp) | 
|   27 ; PROB50: nop # variant = 0 |   27 ; PROB50: nop # variant = 0 | 
|   28 ; PROB50: movups 32(%esp), %xmm0 |   28 ; PROB50: movups 32(%esp), %xmm0 | 
|   29 ; PROB50: nop # variant = 4 |   29 ; PROB50: nop # variant = 4 | 
|   30 ; PROB50: pshufd $49, 32(%esp), %xmm1 |   30 ; PROB50: pshufd $49, 32(%esp), %xmm1 | 
|   31 ; PROB50: pshufd $49, 16(%esp), %xmm2 |   31 ; PROB50: pshufd $49, 16(%esp), %xmm2 | 
|   32 ; PROB50: pmuludq 16(%esp), %xmm0 |   32 ; PROB50: pmuludq 16(%esp), %xmm0 | 
|   33 ; PROB50: pmuludq %xmm2, %xmm1 |   33 ; PROB50: pmuludq %xmm2, %xmm1 | 
|   34 ; PROB50: nop # variant = 0 |   34 ; PROB50: nop # variant = 0 | 
|   35 ; PROB50: shufps $136, %xmm1, %xmm0 |   35 ; PROB50: shufps $136, %xmm1, %xmm0 | 
|   36 ; PROB50: pshufd $216, %xmm0, %xmm0 |   36 ; PROB50: pshufd $216, %xmm0, %xmm0 | 
|   37 ; PROB50: nop # variant = 2 |   37 ; PROB50: nop # variant = 2 | 
|   38 ; PROB50: movups %xmm0, (%esp) |   38 ; PROB50: movups %xmm0, (%esp) | 
|   39 ; PROB50: movups (%esp), %xmm0 |   39 ; PROB50: movups (%esp), %xmm0 | 
|   40 ; PROB50: addl $60, %esp |   40 ; PROB50: addl $60, %esp | 
|   41 ; PROB50: nop # variant = 0 |   41 ; PROB50: nop # variant = 0 | 
|   42 ; PROB50: ret |   42 ; PROB50: ret | 
|   43  |   43  | 
|   44 ; PROB90-LABEL: mul_v4i32: |   44 ; PROB90-LABEL: mul_v4i32 | 
|   45 ; PROB90: nop # variant = 3 |   45 ; PROB90: nop # variant = 3 | 
|   46 ; PROB90: subl $60, %esp |   46 ; PROB90: subl $60, %esp | 
|   47 ; PROB90: nop # variant = 4 |   47 ; PROB90: nop # variant = 4 | 
|   48 ; PROB90: movups %xmm0, 32(%esp) |   48 ; PROB90: movups %xmm0, 32(%esp) | 
|   49 ; PROB90: nop # variant = 3 |   49 ; PROB90: nop # variant = 3 | 
|   50 ; PROB90: movups %xmm1, 16(%esp) |   50 ; PROB90: movups %xmm1, 16(%esp) | 
|   51 ; PROB90: nop # variant = 2 |   51 ; PROB90: nop # variant = 2 | 
|   52 ; PROB90: movups 32(%esp), %xmm0 |   52 ; PROB90: movups 32(%esp), %xmm0 | 
|   53 ; PROB90: nop # variant = 3 |   53 ; PROB90: nop # variant = 3 | 
|   54 ; PROB90: pshufd $49, 32(%esp), %xmm1 |   54 ; PROB90: pshufd $49, 32(%esp), %xmm1 | 
|   55 ; PROB90: nop # variant = 4 |   55 ; PROB90: nop # variant = 4 | 
|   56 ; PROB90: pshufd $49, 16(%esp), %xmm2 |   56 ; PROB90: pshufd $49, 16(%esp), %xmm2 | 
|   57 ; PROB90: nop # variant = 0 |   57 ; PROB90: nop # variant = 0 | 
|   58 ; PROB90: pmuludq 16(%esp), %xmm0 |   58 ; PROB90: pmuludq 16(%esp), %xmm0 | 
|   59 ; PROB90: nop # variant = 2 |   59 ; PROB90: nop # variant = 2 | 
|   60 ; PROB90: pmuludq %xmm2, %xmm1 |   60 ; PROB90: pmuludq %xmm2, %xmm1 | 
|   61 ; PROB90: nop # variant = 3 |   61 ; PROB90: nop # variant = 3 | 
|   62 ; PROB90: shufps $136, %xmm1, %xmm0 |   62 ; PROB90: shufps $136, %xmm1, %xmm0 | 
|   63 ; PROB90: nop # variant = 4 |   63 ; PROB90: nop # variant = 4 | 
|   64 ; PROB90: pshufd $216, %xmm0, %xmm0 |   64 ; PROB90: pshufd $216, %xmm0, %xmm0 | 
|   65 ; PROB90: nop # variant = 2 |   65 ; PROB90: nop # variant = 2 | 
|   66 ; PROB90: movups %xmm0, (%esp) |   66 ; PROB90: movups %xmm0, (%esp) | 
|   67 ; PROB90: nop # variant = 4 |   67 ; PROB90: nop # variant = 4 | 
|   68 ; PROB90: movups (%esp), %xmm0 |   68 ; PROB90: movups (%esp), %xmm0 | 
|   69 ; PROB90: nop # variant = 2 |   69 ; PROB90: nop # variant = 2 | 
|   70 ; PROB90: addl $60, %esp |   70 ; PROB90: addl $60, %esp | 
|   71 ; PROB90: nop # variant = 3 |   71 ; PROB90: nop # variant = 3 | 
|   72 ; PROB90: ret |   72 ; PROB90: ret | 
|   73  |   73  | 
|   74 ; MAXNOPS2-LABEL: mul_v4i32: |   74 ; MAXNOPS2-LABEL: mul_v4i32 | 
|   75 ; MAXNOPS2: subl $60, %esp |   75 ; MAXNOPS2: subl $60, %esp | 
|   76 ; MAXNOPS2: nop # variant = 4 |   76 ; MAXNOPS2: nop # variant = 4 | 
|   77 ; MAXNOPS2: movups %xmm0, 32(%esp) |   77 ; MAXNOPS2: movups %xmm0, 32(%esp) | 
|   78 ; MAXNOPS2: nop # variant = 0 |   78 ; MAXNOPS2: nop # variant = 0 | 
|   79 ; MAXNOPS2: nop # variant = 4 |   79 ; MAXNOPS2: nop # variant = 4 | 
|   80 ; MAXNOPS2: movups %xmm1, 16(%esp) |   80 ; MAXNOPS2: movups %xmm1, 16(%esp) | 
|   81 ; MAXNOPS2: movups 32(%esp), %xmm0 |   81 ; MAXNOPS2: movups 32(%esp), %xmm0 | 
|   82 ; MAXNOPS2: nop # variant = 0 |   82 ; MAXNOPS2: nop # variant = 0 | 
|   83 ; MAXNOPS2: pshufd $49, 32(%esp), %xmm1 |   83 ; MAXNOPS2: pshufd $49, 32(%esp), %xmm1 | 
|   84 ; MAXNOPS2: nop # variant = 2 |   84 ; MAXNOPS2: nop # variant = 2 | 
|   85 ; MAXNOPS2: pshufd $49, 16(%esp), %xmm2 |   85 ; MAXNOPS2: pshufd $49, 16(%esp), %xmm2 | 
|   86 ; MAXNOPS2: pmuludq 16(%esp), %xmm0 |   86 ; MAXNOPS2: pmuludq 16(%esp), %xmm0 | 
|   87 ; MAXNOPS2: nop # variant = 0 |   87 ; MAXNOPS2: nop # variant = 0 | 
|   88 ; MAXNOPS2: nop # variant = 3 |   88 ; MAXNOPS2: nop # variant = 3 | 
|   89 ; MAXNOPS2: pmuludq %xmm2, %xmm1 |   89 ; MAXNOPS2: pmuludq %xmm2, %xmm1 | 
|   90 ; MAXNOPS2: shufps $136, %xmm1, %xmm0 |   90 ; MAXNOPS2: shufps $136, %xmm1, %xmm0 | 
|   91 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0 |   91 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0 | 
|   92 ; MAXNOPS2: nop # variant = 3 |   92 ; MAXNOPS2: nop # variant = 3 | 
|   93 ; MAXNOPS2: movups %xmm0, (%esp) |   93 ; MAXNOPS2: movups %xmm0, (%esp) | 
|   94 ; MAXNOPS2: nop # variant = 0 |   94 ; MAXNOPS2: nop # variant = 0 | 
|   95 ; MAXNOPS2: movups (%esp), %xmm0 |   95 ; MAXNOPS2: movups (%esp), %xmm0 | 
|   96 ; MAXNOPS2: nop # variant = 2 |   96 ; MAXNOPS2: nop # variant = 2 | 
|   97 ; MAXNOPS2: addl $60, %esp |   97 ; MAXNOPS2: addl $60, %esp | 
|   98 ; MAXNOPS2: nop # variant = 4 |   98 ; MAXNOPS2: nop # variant = 4 | 
|   99 ; MAXNOPS2: ret |   99 ; MAXNOPS2: ret | 
|  100 } |  100 } | 
| OLD | NEW |