Skip to content

Instantly share code, notes, and snippets.

@MihuBot
Created May 27, 2024 23:51
Show Gist options
  • Save MihuBot/f60c7254060f436ae9c12ded701c85f8 to your computer and use it in GitHub Desktop.
Save MihuBot/f60c7254060f436ae9c12ded701c85f8 to your computer and use it in GitHub Desktop.

Top method improvements

-54 (-6.09 % of base) - System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong
 ; Assembly listing for method System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 13 single block inlinees; 17 inlinees without PGO data
+; 0 inlinees with PGO data; 21 single block inlinees; 25 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;  V00 arg0         [V00,T06] (  9,  9   )    long  ->  rdi         single-def
 ;  V01 arg1         [V01,T04] ( 15, 12   )    long  ->  rsi         single-def
 ;  V02 arg2         [V02,T11] (  9,  6   )    long  ->  rdx         single-def
 ;  V03 loc0         [V03,T00] ( 23, 30   )    long  ->  rax        
 ;  V04 loc1         [V04,T12] ( 13,  6.50)     int  ->  rcx        
 ;* V05 loc2         [V05    ] (  0,  0   )     int  ->  zero-ref   
 ;  V06 loc3         [V06,T05] (  7, 14   )    long  ->  registers  
 ;  V07 loc4         [V07,T22] (  5,  2.50)    long  ->  rdx        
 ;  V08 loc5         [V08,T16] (  2,  4.50)    long  ->  rcx        
 ;# V09 OutArgs      [V09    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
 ;  V10 tmp1         [V10,T23] (  3,  1.50)    long  ->  rax         "Inline return value spill temp"
 ;  V11 tmp2         [V11,T07] (  5,  9.50)   byref  ->  rax         single-def "Inline stloc first use temp"
 ;  V12 tmp3         [V12,T30] ( 14, 17.50)  simd64  ->  mm0         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
 ;  V13 tmp4         [V13,T13] (  5,  6   )   byref  ->  rcx         single-def "Inline stloc first use temp"
-;  V14 tmp5         [V14,T01] ( 12, 27   )    long  ->   r8         "Inline stloc first use temp"
-;  V15 tmp6         [V15,T17] (  2,  4.50)    long  ->   r9         "Inline stloc first use temp"
-;  V16 tmp7         [V16,T34] (  3, 12   )  simd64  ->  mm2         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-;* V17 tmp8         [V17    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-;  V18 tmp9         [V18,T24] (  3,  1.50)    long  ->  rax         "Inline return value spill temp"
-;  V19 tmp10        [V19,T08] (  5,  9.50)   byref  ->  rax         single-def "Inline stloc first use temp"
-;  V20 tmp11        [V20,T31] ( 14, 17.50)  simd32  ->  mm0         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;  V21 tmp12        [V21,T14] (  5,  6   )   byref  ->  rcx         single-def "Inline stloc first use temp"
-;  V22 tmp13        [V22,T02] ( 12, 27   )    long  ->   r8         "Inline stloc first use temp"
-;  V23 tmp14        [V23,T18] (  2,  4.50)    long  ->   r9         "Inline stloc first use temp"
-;  V24 tmp15        [V24,T35] (  3, 12   )  simd32  ->  mm2         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V25 tmp16        [V25    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V26 tmp17        [V26    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V27 tmp18        [V27    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V28 tmp19        [V28    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V29 tmp20        [V29    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V30 tmp21        [V30    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V31 tmp22        [V31    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V32 tmp23        [V32    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V33 tmp24        [V33    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;  V34 tmp25        [V34,T25] (  3,  1.50)    long  ->  rax         "Inline return value spill temp"
-;* V35 tmp26        [V35,T27] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;* V36 tmp27        [V36    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;  V37 tmp28        [V37,T09] (  5,  9.50)   byref  ->  rax         single-def "Inline stloc first use temp"
-;  V38 tmp29        [V38,T32] ( 14, 17.50)  simd16  ->  mm0         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;  V39 tmp30        [V39,T15] (  5,  6   )   byref  ->  rcx         single-def "Inline stloc first use temp"
-;* V40 tmp31        [V40    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V41 tmp32        [V41,T03] ( 11, 26.50)    long  ->   r8         "Inline stloc first use temp"
-;  V42 tmp33        [V42,T19] (  2,  4.50)    long  ->   r9         "Inline stloc first use temp"
-;  V43 tmp34        [V43,T36] (  3, 12   )  simd16  ->  mm2         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V44 tmp35        [V44    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;  V45 tmp36        [V45,T33] (  2, 16   )  simd16  ->  mm0         "Spilling op1 side effects for HWIntrinsic"
-;* V46 tmp37        [V46    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
-;* V47 tmp38        [V47    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V48 tmp39        [V48    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V49 tmp40        [V49    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V50 tmp41        [V50    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V51 tmp42        [V51    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V52 tmp43        [V52    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V53 tmp44        [V53    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V54 tmp45        [V54    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V55 tmp46        [V55    ] (  0,  0   )  simd16  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V56 tmp47        [V56    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V57 tmp48        [V57    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V58 tmp49        [V58    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V59 tmp50        [V59    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
-;  V60 tmp51        [V60,T28] (  3, 24   )  simd16  ->  mm0         "dup spill"
-;* V61 tmp52        [V61    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
-;* V62 tmp53        [V62    ] (  0,  0   )   byref  ->  zero-ref    "Inlining Arg"
-;  V63 tmp54        [V63,T20] (  3,  3   )   byref  ->   r8         single-def "Inlining Arg"
-;  V64 tmp55        [V64,T21] (  3,  3   )   byref  ->  rdx         "Inlining Arg"
-;* V65 tmp56        [V65,T26] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;  V66 cse0         [V66,T10] (  3,  8.50)    long  ->  r10         "CSE #12: conservative"
-;  V67 cse1         [V67,T37] (  5,  6   )  simd64  ->  mm1         "CSE #02: conservative"
-;  V68 cse2         [V68,T38] (  5,  6   )  simd32  ->  mm1         "CSE #07: conservative"
-;  V69 cse3         [V69,T39] (  5,  6   )  simd16  ->  mm1         "CSE #11: conservative"
-;  V70 rat0         [V70,T29] (  3, 24   )  simd64  ->  mm3         "ReplaceWithLclVar is creating a new local variable"
+;* V14 tmp5         [V14    ] (  0,  0   )  simd64  ->  zero-ref    "spilled call-like call argument"
+;  V15 tmp6         [V15,T01] ( 12, 27   )    long  ->   r8         "Inline stloc first use temp"
+;  V16 tmp7         [V16,T17] (  2,  4.50)    long  ->   r9         "Inline stloc first use temp"
+;  V17 tmp8         [V17,T36] (  3, 12   )  simd64  ->  mm3         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V18 tmp9         [V18    ] (  0,  0   )  simd64  ->  zero-ref    "spilled call-like call argument"
+;  V19 tmp10        [V19,T33] (  2, 16   )  simd64  ->  mm0         "Spilling op1 side effects for HWIntrinsic"
+;* V20 tmp11        [V20    ] (  0,  0   )  simd64  ->  zero-ref    "spilled call-like call argument"
+;* V21 tmp12        [V21    ] (  0,  0   )  simd64  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V22 tmp13        [V22    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V23 tmp14        [V23    ] (  0,  0   )  simd64  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V24 tmp15        [V24    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V25 tmp16        [V25    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V26 tmp17        [V26    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V27 tmp18        [V27    ] (  0,  0   )  simd64  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V28 tmp19        [V28    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;  V29 tmp20        [V29,T24] (  3,  1.50)    long  ->  rax         "Inline return value spill temp"
+;  V30 tmp21        [V30,T08] (  5,  9.50)   byref  ->  rax         single-def "Inline stloc first use temp"
+;  V31 tmp22        [V31,T31] ( 14, 17.50)  simd32  ->  mm0         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;  V32 tmp23        [V32,T14] (  5,  6   )   byref  ->  rcx         single-def "Inline stloc first use temp"
+;* V33 tmp24        [V33    ] (  0,  0   )  simd32  ->  zero-ref    "spilled call-like call argument"
+;  V34 tmp25        [V34,T02] ( 12, 27   )    long  ->   r8         "Inline stloc first use temp"
+;  V35 tmp26        [V35,T18] (  2,  4.50)    long  ->   r9         "Inline stloc first use temp"
+;  V36 tmp27        [V36,T37] (  3, 12   )  simd32  ->  mm2         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V37 tmp28        [V37    ] (  0,  0   )  simd32  ->  zero-ref    "spilled call-like call argument"
+;  V38 tmp29        [V38,T34] (  2, 16   )  simd32  ->  mm0         "Spilling op1 side effects for HWIntrinsic"
+;* V39 tmp30        [V39    ] (  0,  0   )  simd32  ->  zero-ref    "spilled call-like call argument"
+;* V40 tmp31        [V40    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V41 tmp32        [V41    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V42 tmp33        [V42    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V43 tmp34        [V43    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V44 tmp35        [V44    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V45 tmp36        [V45    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V46 tmp37        [V46    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V47 tmp38        [V47    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V48 tmp39        [V48    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V49 tmp40        [V49    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V50 tmp41        [V50    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V51 tmp42        [V51    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V52 tmp43        [V52    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V53 tmp44        [V53    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V54 tmp45        [V54    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V55 tmp46        [V55    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V56 tmp47        [V56,T25] (  3,  1.50)    long  ->  rax         "Inline return value spill temp"
+;* V57 tmp48        [V57,T27] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
+;* V58 tmp49        [V58    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V59 tmp50        [V59,T09] (  5,  9.50)   byref  ->  rax         single-def "Inline stloc first use temp"
+;  V60 tmp51        [V60,T32] ( 14, 17.50)  simd16  ->  mm0         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;  V61 tmp52        [V61,T15] (  5,  6   )   byref  ->  rcx         single-def "Inline stloc first use temp"
+;* V62 tmp53        [V62    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
+;  V63 tmp54        [V63,T03] ( 11, 26.50)    long  ->   r8         "Inline stloc first use temp"
+;  V64 tmp55        [V64,T19] (  2,  4.50)    long  ->   r9         "Inline stloc first use temp"
+;  V65 tmp56        [V65,T38] (  3, 12   )  simd16  ->  mm2         "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V66 tmp57        [V66    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
+;  V67 tmp58        [V67,T35] (  2, 16   )  simd16  ->  mm0         "Spilling op1 side effects for HWIntrinsic"
+;* V68 tmp59        [V68    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
+;* V69 tmp60        [V69    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V70 tmp61        [V70    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V71 tmp62        [V71    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V72 tmp63        [V72    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V73 tmp64        [V73    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V74 tmp65        [V74    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V75 tmp66        [V75    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V76 tmp67        [V76    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V77 tmp68        [V77    ] (  0,  0   )  simd16  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V78 tmp69        [V78    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V79 tmp70        [V79    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V80 tmp71        [V80    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V81 tmp72        [V81    ] (  0,  0   )    long  ->  zero-ref    "Inlining Arg"
+;  V82 tmp73        [V82,T28] (  3, 24   )  simd16  ->  mm0         "dup spill"
+;* V83 tmp74        [V83    ] (  0,  0   )  simd16  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+;* V84 tmp75        [V84    ] (  0,  0   )   byref  ->  zero-ref    "Inlining Arg"
+;  V85 tmp76        [V85,T20] (  3,  3   )   byref  ->   r8         single-def "Inlining Arg"
+;  V86 tmp77        [V86,T21] (  3,  3   )   byref  ->  rdx         "Inlining Arg"
+;* V87 tmp78        [V87,T26] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;  V88 cse0         [V88,T10] (  3,  8.50)    long  ->  r10         "CSE #05: conservative"
+;  V89 cse1         [V89,T39] (  5,  6   )  simd64  ->  mm1         "CSE #01: conservative"
+;  V90 cse2         [V90,T40] (  5,  6   )  simd32  ->  mm1         "CSE #03: conservative"
+;  V91 cse3         [V91,T41] (  5,  6   )  simd16  ->  mm1         "CSE #04: conservative"
+;  V92 cse4         [V92,T42] (  5,  6   )  simd64  ->  mm2         "CSE #02: conservative"
+;  V93 rat0         [V93,T29] (  3, 24   )  simd64  ->  mm4         "ReplaceWithLclVar is creating a new local variable"
 ;
 ; Lcl frame size = 0
 
 G_M6063_IG01:
        push     rbp
        mov      rbp, rsp
 						;; size=4 bbWeight=1 PerfScore 1.25
 G_M6063_IG02:
        xor      eax, eax
        cmp      rdx, 32
        jb       G_M6063_IG26
 						;; size=12 bbWeight=1 PerfScore 1.50
 G_M6063_IG03:
        mov      rcx, qword ptr [rdi]
        mov      r8, 0xD1FFAB1E
        test     rcx, r8
        mov      r8, rcx
        jne      G_M6063_IG28
        cmp      rdx, 128
        jae      G_M6063_IG19
        cmp      rdx, 64
        jae      G_M6063_IG11
        mov      rax, rdi
        vmovups  xmm0, xmmword ptr [rax]
        vmovups  xmm1, xmmword ptr [reloc @RWD00]
        vptest   xmm0, xmm1
        jne      G_M6063_IG09
        mov      rcx, rsi
        vpackuswb xmm0, xmm0, xmm0
        vmovsd   qword ptr [rcx], xmm0
        mov      r8d, 8
        test     sil, 8
        jne      SHORT G_M6063_IG04
        vmovups  xmm0, xmmword ptr [rax+0x10]
        vptest   xmm0, xmm1
        jne      SHORT G_M6063_IG08
        vpackuswb xmm0, xmm0, xmm0
        vmovsd   qword ptr [rcx+0x08], xmm0
 						;; size=118 bbWeight=0.50 PerfScore 16.62
 G_M6063_IG04:
        mov      r8, rsi
        and      r8, 15
        neg      r8
        add      r8, 16
        lea      r9, [rdx-0x10]
        align    [0 bytes for IG05]
 						;; size=18 bbWeight=0.50 PerfScore 0.75
 G_M6063_IG05:
        vmovups  xmm0, xmmword ptr [rax+2*r8]
        lea      r10, [r8+0x08]
        vmovups  xmm2, xmmword ptr [rax+2*r10]
        vpor     xmm3, xmm0, xmm2
        vptest   xmm3, xmm1
        je       SHORT G_M6063_IG07
 						;; size=27 bbWeight=4 PerfScore 51.33
 G_M6063_IG06:
        vptest   xmm0, xmm1
        jne      SHORT G_M6063_IG08
        vpackuswb xmm0, xmm0, xmm0
        vmovsd   qword ptr [rcx+r8], xmm0
        mov      r8, r10
        jmp      SHORT G_M6063_IG08
        align    [0 bytes for IG13]
 						;; size=22 bbWeight=0.50 PerfScore 4.62
 G_M6063_IG07:
        vpackuswb xmm0, xmm0, xmm2
        vmovups  xmmword ptr [rcx+r8], xmm0
        add      r8, 16
        cmp      r8, r9
        jbe      SHORT G_M6063_IG05
 						;; size=19 bbWeight=4 PerfScore 18.00
 G_M6063_IG08:
        mov      rax, r8
        jmp      SHORT G_M6063_IG10
 						;; size=5 bbWeight=0.50 PerfScore 1.12
 G_M6063_IG09:
        xor      eax, eax
 						;; size=2 bbWeight=0.50 PerfScore 0.12
 G_M6063_IG10:
        jmp      G_M6063_IG26
 						;; size=5 bbWeight=0.50 PerfScore 1.00
 G_M6063_IG11:
        mov      rax, rdi
        vmovups  ymm0, ymmword ptr [rax]
        vmovups  ymm1, ymmword ptr [reloc @RWD32]
        vptest   ymm0, ymm1
        jne      G_M6063_IG17
        mov      rcx, rsi
-       vpmovwb  ymm0, ymm2
-       vpmovwb  ymm0, ymm0
-       vinserti128 ymm0, ymm2, xmm0, 1
+       vpackuswb ymm0, ymm0, ymm0
+       vpermq   ymm0, ymm0, -40
        vmovups  xmmword ptr [rcx], xmm0
        mov      r8d, 16
        test     sil, 16
        jne      SHORT G_M6063_IG12
        vmovups  ymm0, ymmword ptr [rax+0x20]
        vptest   ymm0, ymm1
-       jne      G_M6063_IG16
-       vpmovwb  ymm0, ymm2
-       vpmovwb  ymm0, ymm0
-       vinserti128 ymm0, ymm2, xmm0, 1
+       jne      SHORT G_M6063_IG16
+       vpackuswb ymm0, ymm0, ymm0
+       vpermq   ymm0, ymm0, -40
        vmovups  xmmword ptr [rcx+0x10], xmm0
-						;; size=102 bbWeight=0.50 PerfScore 24.00
+						;; size=82 bbWeight=0.50 PerfScore 19.00
 G_M6063_IG12:
        mov      r8, rsi
        and      r8, 31
        neg      r8
        add      r8, 32
        lea      r9, [rdx-0x20]
 						;; size=18 bbWeight=0.50 PerfScore 0.75
 G_M6063_IG13:
        vmovups  ymm0, ymmword ptr [rax+2*r8]
        vmovups  ymm2, ymmword ptr [rax+2*r8+0x20]
        vpor     ymm3, ymm0, ymm2
        vptest   ymm3, ymm1
        je       SHORT G_M6063_IG15
 						;; size=24 bbWeight=4 PerfScore 65.33
 G_M6063_IG14:
        vptest   ymm0, ymm1
        jne      SHORT G_M6063_IG16
-       vpmovwb  ymm0, ymm2
-       vpmovwb  ymm0, ymm0
-       vinserti128 ymm1, ymm2, xmm0, 1
-       vmovups  xmmword ptr [rcx+r8], xmm1
+       vpackuswb ymm0, ymm0, ymm0
+       vpermq   ymm2, ymm0, -40
+       vmovups  xmmword ptr [rcx+r8], xmm2
        add      r8, 16
        jmp      SHORT G_M6063_IG16
        align    [0 bytes for IG21]
-						;; size=37 bbWeight=0.50 PerfScore 9.12
+						;; size=29 bbWeight=0.50 PerfScore 6.62
 G_M6063_IG15:
-       vpmovwb  ymm0, ymm0
-       vpmovwb  ymm2, ymm2
-       vinserti128 ymm0, ymm0, xmm2, 1
+       vpackuswb ymm0, ymm0, ymm2
+       vpermq   ymm0, ymm0, -40
        vmovups  ymmword ptr [rcx+r8], ymm0
        add      r8, 32
        cmp      r8, r9
        jbe      SHORT G_M6063_IG13
-						;; size=33 bbWeight=4 PerfScore 46.00
+						;; size=25 bbWeight=4 PerfScore 26.00
 G_M6063_IG16:
        mov      rax, r8
        jmp      SHORT G_M6063_IG18
 						;; size=5 bbWeight=0.50 PerfScore 1.12
 G_M6063_IG17:
        xor      eax, eax
 						;; size=2 bbWeight=0.50 PerfScore 0.12
 G_M6063_IG18:
        jmp      G_M6063_IG26
 						;; size=5 bbWeight=0.50 PerfScore 1.00
 G_M6063_IG19:
        mov      rax, rdi
        vmovups  zmm0, zmmword ptr [rax]
        vmovups  zmm1, zmmword ptr [reloc @RWD64]
        vptestmw k1, zmm1, zmm0
        kortestd k1, k1
        jne      G_M6063_IG25
        mov      rcx, rsi
-       vpmovwb  zmm0, zmm2
-       vpmovwb  zmm0, zmm0
-       vinserti64x4 zmm0, zmm2, ymm0, 1
+       vpackuswb zmm0, zmm0, zmm0
+       vmovups  zmm2, zmmword ptr [reloc @RWD128]
+       vpermq   zmm0, zmm2, zmm0
        vmovups  ymmword ptr [rcx], ymm0
        mov      r8d, 32
        test     sil, 32
        jne      SHORT G_M6063_IG20
        vmovups  zmm0, zmmword ptr [rax+0x40]
        vptestmw k1, zmm1, zmm0
        kortestd k1, k1
        jne      G_M6063_IG24
-       vpmovwb  zmm0, zmm2
-       vpmovwb  zmm0, zmm0
-       vinserti64x4 zmm0, zmm2, ymm0, 1
+       vpackuswb zmm0, zmm0, zmm0
+       vpermq   zmm0, zmm2, zmm0
        vmovups  ymmword ptr [rcx+0x20], ymm0
-						;; size=122 bbWeight=0.50 PerfScore 21.50
+						;; size=118 bbWeight=0.50 PerfScore 18.00
 G_M6063_IG20:
        mov      r8, rsi
        and      r8, 63
        neg      r8
        add      r8, 64
        lea      r9, [rdx-0x40]
 						;; size=18 bbWeight=0.50 PerfScore 0.75
 G_M6063_IG21:
        vmovups  zmm0, zmmword ptr [rax+2*r8]
-       vmovups  zmm2, zmmword ptr [rax+2*r8+0x40]
-       vmovaps  zmm3, zmm0
-       vpternlogd zmm3, zmm2, zmm1, -88
-       vptestmw k1, zmm3, zmm3
+       vmovups  zmm3, zmmword ptr [rax+2*r8+0x40]
+       vmovaps  zmm4, zmm0
+       vpternlogd zmm4, zmm3, zmm1, -88
+       vptestmw k1, zmm4, zmm4
        kortestd k1, k1
        je       SHORT G_M6063_IG23
 						;; size=41 bbWeight=4 PerfScore 55.00
 G_M6063_IG22:
        vptestmw k1, zmm1, zmm0
        kortestd k1, k1
        jne      SHORT G_M6063_IG24
-       vpmovwb  zmm0, zmm2
-       vpmovwb  zmm0, zmm0
-       vinserti64x4 zmm1, zmm2, ymm0, 1
-       vmovups  ymmword ptr [rcx+r8], ymm1
+       vpackuswb zmm0, zmm0, zmm0
+       vpermq   zmm3, zmm2, zmm0
+       vmovups  ymmword ptr [rcx+r8], ymm3
        add      r8, 32
        jmp      SHORT G_M6063_IG24
        align    [0 bytes for IG27]
-						;; size=44 bbWeight=0.50 PerfScore 8.62
+						;; size=37 bbWeight=0.50 PerfScore 6.12
 G_M6063_IG23:
-       vpmovwb  zmm0, zmm0
-       vpmovwb  zmm2, zmm2
-       vinserti64x4 zmm0, zmm0, ymm2, 1
+       vpackuswb zmm0, zmm0, zmm3
+       vpermq   zmm0, zmm2, zmm0
        vmovups  zmmword ptr [rcx+r8], zmm0
        add      r8, 64
        cmp      r8, r9
        jbe      SHORT G_M6063_IG21
-						;; size=35 bbWeight=4 PerfScore 46.00
+						;; size=28 bbWeight=4 PerfScore 26.00
 G_M6063_IG24:
        mov      rax, r8
        jmp      SHORT G_M6063_IG26
 						;; size=5 bbWeight=0.50 PerfScore 1.12
 G_M6063_IG25:
        xor      eax, eax
 						;; size=2 bbWeight=0.50 PerfScore 0.12
 G_M6063_IG26:
        sub      rdx, rax
        cmp      rdx, 4
        jb       SHORT G_M6063_IG30
        lea      rcx, [rax+rdx-0x04]
 						;; size=14 bbWeight=0.50 PerfScore 1.25
 G_M6063_IG27:
        mov      r8, qword ptr [rdi+2*rax]
        mov      r9, 0xD1FFAB1E
        test     r8, r9
        je       SHORT G_M6063_IG29
 						;; size=19 bbWeight=4 PerfScore 14.00
 G_M6063_IG28:
        mov      ecx, r8d
        test     ecx, 0xD1FFAB1E
        jne      SHORT G_M6063_IG31
        lea      rdx, [rsi+rax]
        mov      byte  ptr [rdx], cl
        shr      ecx, 16
        mov      byte  ptr [rdx+0x01], cl
        shr      r8, 32
        mov      ecx, r8d
        add      rax, 2
        jmp      SHORT G_M6063_IG31
 						;; size=36 bbWeight=0.50 PerfScore 3.75
 G_M6063_IG29:
        vmovd    xmm0, r8
        vpackuswb xmm0, xmm0, xmm0
        vmovd    dword ptr [rsi+rax], xmm0
        add      rax, 4
        cmp      rax, rcx
        jbe      SHORT G_M6063_IG27
 						;; size=23 bbWeight=4 PerfScore 26.00
 G_M6063_IG30:
        test     dl, 2
        je       SHORT G_M6063_IG33
        mov      ecx, dword ptr [rdi+2*rax]
        test     ecx, 0xD1FFAB1E
        je       SHORT G_M6063_IG32
 						;; size=16 bbWeight=0.50 PerfScore 2.25
 G_M6063_IG31:
        test     ecx, 0xFF80
        je       SHORT G_M6063_IG34
        jmp      SHORT G_M6063_IG35
 						;; size=10 bbWeight=0.50 PerfScore 1.62
 G_M6063_IG32:
        lea      r8, [rsi+rax]
        mov      byte  ptr [r8], cl
        shr      ecx, 16
        mov      byte  ptr [r8+0x01], cl
        add      rax, 2
 						;; size=18 bbWeight=0.50 PerfScore 1.62
 G_M6063_IG33:
        test     dl, 1
        je       SHORT G_M6063_IG35
        movzx    rcx, word  ptr [rdi+2*rax]
        cmp      ecx, 127
        ja       SHORT G_M6063_IG35
 						;; size=14 bbWeight=0.50 PerfScore 2.25
 G_M6063_IG34:
        mov      byte  ptr [rsi+rax], cl
        inc      rax
 						;; size=6 bbWeight=0.50 PerfScore 0.62
 G_M6063_IG35:
        vzeroupper 
        pop      rbp
        ret      
 						;; size=5 bbWeight=1 PerfScore 2.50
 RWD00  	dq	FF80FF80FF80FF80h, FF80FF80FF80FF80h
 RWD16  	dd	00000000h, 00000000h, 00000000h, 00000000h
 RWD32  	dq	FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
 RWD64  	dq	FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD128 	dq	0000000000000000h, 0000000000000002h, 0000000000000004h, 0000000000000006h, 0000000000000001h, 0000000000000003h, 0000000000000005h, 0000000000000007h
 
 
-; Total bytes of code 886, prolog size 4, PerfScore 432.79, instruction count 210, allocated bytes for code 906 (MethodHash=53fae850) for method System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong (FullOpts)
+; Total bytes of code 832, prolog size 4, PerfScore 379.29, instruction count 203, allocated bytes for code 852 (MethodHash=53fae850) for method System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong (FullOpts)
-39 (-4.58 % of base) - System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this
 ; Assembly listing for method System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 13 single block inlinees; 11 inlinees without PGO data
+; 0 inlinees with PGO data; 14 single block inlinees; 10 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;  V00 this         [V00,T01] (  9,  8   )     ref  ->  [rbp-0x38]  this class-hnd EH-live single-def <System.Runtime.Caching.MemoryCacheStatistics>
-;  V01 loc0         [V01,T19] (  3,  2   )   ubyte  ->  [rbp-0x2C]  do-not-enreg[M] EH-live
+;  V01 loc0         [V01,T15] (  3,  2   )   ubyte  ->  [rbp-0x2C]  do-not-enreg[M] EH-live
 ;* V02 loc1         [V02    ] (  0,  0   )     ref  ->  zero-ref    class-hnd exact single-def <System.Threading.Timer>
-;  V03 loc2         [V03,T18] (  4,  2.50)   ubyte  ->  [rbp-0x30]  do-not-enreg[M] EH-live
+;  V03 loc2         [V03,T14] (  4,  2.50)   ubyte  ->  [rbp-0x30]  do-not-enreg[M] EH-live
 ;# V04 OutArgs      [V04    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V05 tmp1         [V05,T06] (  3,  6   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.CacheMemoryMonitor>
+;  V05 tmp1         [V05,T05] (  3,  6   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.CacheMemoryMonitor>
 ;  V06 tmp2         [V06,T03] (  4,  8   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerCallback>
-;  V07 tmp3         [V07,T04] (  4,  7   )     ref  ->  r15         class-hnd exact single-def "NewObj constructor temp" <System.Threading.Timer>
-;  V08 tmp4         [V08,T07] (  3,  6   )     ref  ->  r14         class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.GCHandleRef`1[System.Threading.Timer]>
+;  V07 tmp3         [V07,T04] (  4,  7   )     ref  ->  [rbp-0x40]  class-hnd exact spill-single-def "NewObj constructor temp" <System.Threading.Timer>
+;  V08 tmp4         [V08,T06] (  3,  6   )     ref  ->  r12         class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.GCHandleRef`1[System.Threading.Timer]>
 ;* V09 tmp5         [V09    ] (  0,  0   )  struct ( 8) zero-ref    "location for address-of(RValue)" <System.Threading.AsyncFlowControl>
 ;  V10 tmp6         [V10,T02] (  5,  7   )     int  ->  r13         "Inlining Arg"
-;  V11 tmp7         [V11,T13] (  2,  3   )     int  ->  r14         "Inlining Arg"
+;  V11 tmp7         [V11,T10] (  2,  3   )     int  ->  r14         "Inlining Arg"
 ;* V12 tmp8         [V12    ] (  0,  0   )     int  ->  zero-ref    ld-addr-op "Inlining Arg"
-;* V13 tmp9         [V13,T22] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
+;* V13 tmp9         [V13,T17] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
 ;* V14 tmp10        [V14    ] (  0,  0   )     int  ->  zero-ref    ld-addr-op "Inlining Arg"
-;* V15 tmp11        [V15,T23] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;  V16 tmp12        [V16,T00] (  8, 15   )     ref  ->  r12         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerQueueTimer>
-;  V17 tmp13        [V17,T08] (  3,  6   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerHolder>
-;  V18 tmp14        [V18,T09] (  3,  6   )     ref  ->  rbx         class-hnd exact single-def "impAppendStmt" <System.Threading.TimerQueue[]>
-;* V19 tmp15        [V19    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;* V15 tmp11        [V15,T18] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
+;  V16 tmp12        [V16,T00] (  8, 15   )     ref  ->  [rbp-0x48]  class-hnd exact spill-single-def "NewObj constructor temp" <System.Threading.TimerQueueTimer>
+;  V17 tmp13        [V17,T07] (  3,  6   )     ref  ->  r14         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerHolder>
+;  V18 tmp14        [V18,T08] (  3,  6   )     ref  ->  [rbp-0x50]  class-hnd exact spill-single-def "impAppendStmt" <System.Threading.TimerQueue[]>
+;  V19 tmp15        [V19,T11] (  2,  4   )    long  ->  rax         "impAppendStmt"
 ;* V20 tmp16        [V20    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;  V21 tmp17        [V21,T20] (  3,  2   )     int  ->  rcx         "Inline return value spill temp"
-;  V22 tmp18        [V22,T05] (  4,  7   )     int  ->  rcx         "dup spill"
-;* V23 tmp19        [V23    ] (  0,  0   )     int  ->  zero-ref    single-def "Inline stloc first use temp"
-;* V24 tmp20        [V24    ] (  0,  0   )  struct ( 8) zero-ref    ld-addr-op "NewObj constructor temp" <System.Runtime.InteropServices.GCHandle>
-;* V25 tmp21        [V25    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
-;  V26 tmp22        [V26,T15] (  4,  3.50)    long  ->  rax         "Inline stloc first use temp"
-;* V27 tmp23        [V27    ] (  0,  0   )     ref  ->  zero-ref    "field V09._thread (fldOffset=0x0)" P-INDEP
-;  V28 tmp24        [V28,T21] (  2,  2   )    long  ->  rax         single-def "field V24._handle (fldOffset=0x0)" P-INDEP
-;  V29 tmp25        [V29,T26] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
-;* V30 tmp26        [V30    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
-;  V31 tmp27        [V31,T10] (  3,  6   )    long  ->  rdx         "index expr"
-;  V32 PSPSym       [V32,T24] (  1,  1   )    long  ->  [rbp-0x40]  do-not-enreg[V] "PSPSym"
-;* V33 cse0         [V33,T25] (  0,  0   )    long  ->  zero-ref    "CSE #03: moderate"
-;  V34 cse1         [V34,T16] (  3,  3   )    long  ->  rax         "CSE #02: moderate"
-;  V35 cse2         [V35,T17] (  3,  3   )     int  ->  r14         "CSE #01: moderate"
-;  V36 rat0         [V36,T14] (  3,  4   )    long  ->  rax         "TLS field access"
-;  V37 rat1         [V37,T11] (  3,  6   )    long  ->  rax         "TLS access"
-;  V38 rat2         [V38,T12] (  3,  6   )    long  ->  rax         "ThreadStaticBlockBase access"
+;* V21 tmp17        [V21    ] (  0,  0   )  struct ( 8) zero-ref    ld-addr-op "NewObj constructor temp" <System.Runtime.InteropServices.GCHandle>
+;* V22 tmp18        [V22    ] (  0,  0   )    long  ->  zero-ref    "Inline stloc first use temp"
+;  V23 tmp19        [V23,T12] (  4,  3.50)    long  ->  rax         "Inline stloc first use temp"
+;* V24 tmp20        [V24    ] (  0,  0   )     ref  ->  zero-ref    "field V09._thread (fldOffset=0x0)" P-INDEP
+;  V25 tmp21        [V25,T16] (  2,  2   )    long  ->  rax         single-def "field V21._handle (fldOffset=0x0)" P-INDEP
+;  V26 tmp22        [V26,T21] (  2,  0   )     ref  ->  rdx         single-def "argument with side effect"
+;* V27 tmp23        [V27    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
+;  V28 tmp24        [V28,T09] (  3,  6   )    long  ->  rdx         "index expr"
+;  V29 PSPSym       [V29,T19] (  1,  1   )    long  ->  [rbp-0x60]  do-not-enreg[V] "PSPSym"
+;* V30 cse0         [V30,T20] (  0,  0   )    long  ->  zero-ref    "CSE #02: moderate"
+;  V31 cse1         [V31,T13] (  3,  3   )     int  ->  r14         "CSE #01: moderate"
 ;
-; Lcl frame size = 24
+; Lcl frame size = 56
 
 G_M44586_IG01:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     r12
        push     rbx
-       sub      rsp, 24
-       lea      rbp, [rsp+0x40]
-       mov      qword ptr [rbp-0x40], rsp
+       sub      rsp, 56
+       vzeroupper 
+       lea      rbp, [rsp+0x60]
+       mov      qword ptr [rbp-0x60], rsp
        mov      gword ptr [rbp-0x38], rdi
-						;; size=27 bbWeight=1 PerfScore 8.75
+						;; size=30 bbWeight=1 PerfScore 9.75
 G_M44586_IG02:
        mov      dword ptr [rbp-0x2C], 1
 						;; size=7 bbWeight=1 PerfScore 1.00
 G_M44586_IG03:
        mov      rdi, 0xD1FFAB1E      ; System.Runtime.Caching.CacheMemoryMonitor
        call     CORINFO_HELP_NEWSFAST
        mov      rbx, rax
        mov      rdi, gword ptr [rbp-0x38]
        mov      edx, dword ptr [rdi+0x48]
        mov      rsi, gword ptr [rdi+0x20]
        mov      rdi, rbx
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Caching.CacheMemoryMonitor:.ctor(System.Runtime.Caching.MemoryCache,int):this
        call     [rax]System.Runtime.Caching.CacheMemoryMonitor:.ctor(System.Runtime.Caching.MemoryCache,int):this
        mov      rdi, gword ptr [rbp-0x38]
        lea      rdi, bword ptr [rdi+0x18]
        mov      rsi, rbx
        call     CORINFO_HELP_ASSIGN_REF
        xor      eax, eax
        mov      dword ptr [rbp-0x30], eax
 						;; size=65 bbWeight=1 PerfScore 14.00
 G_M44586_IG04:
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:IsFlowSuppressed():ubyte
        call     [rax]System.Threading.ExecutionContext:IsFlowSuppressed():ubyte
        test     eax, eax
        jne      SHORT G_M44586_IG06
 						;; size=16 bbWeight=1 PerfScore 4.50
 G_M44586_IG05:
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:SuppressFlow():System.Threading.AsyncFlowControl
        call     [rax]System.Threading.ExecutionContext:SuppressFlow():System.Threading.AsyncFlowControl
        mov      dword ptr [rbp-0x30], 1
 						;; size=19 bbWeight=0.50 PerfScore 2.12
 G_M44586_IG06:
        mov      rdi, 0xD1FFAB1E      ; System.Threading.TimerCallback
        call     CORINFO_HELP_NEWSFAST
        mov      rbx, rax
        lea      rdi, bword ptr [rbx+0x08]
        mov      rsi, gword ptr [rbp-0x38]
        call     CORINFO_HELP_ASSIGN_REF
        mov      rdi, 0xD1FFAB1E      ; code for System.Runtime.Caching.MemoryCacheStatistics:CacheManagerTimerCallback(System.Object):this
        mov      qword ptr [rbx+0x18], rdi
        mov      rdi, 0xD1FFAB1E      ; System.Threading.Timer
        call     CORINFO_HELP_NEWSFAST
-       mov      r15, rax
+       mov      gword ptr [rbp-0x40], rax
        mov      rsi, gword ptr [rbp-0x38]
        mov      r14d, dword ptr [rsi+0x50]
        mov      r13d, r14d
        cmp      r13d, -1
-       jl       G_M44586_IG16
+       jl       G_M44586_IG14
        mov      rdi, 0xD1FFAB1E      ; System.Threading.TimerQueueTimer
        call     CORINFO_HELP_NEWSFAST
        mov      r12, rax
+       mov      gword ptr [rbp-0x48], r12
        lea      rdi, bword ptr [r12+0x20]
        mov      rsi, rbx
        call     CORINFO_HELP_ASSIGN_REF
        xor      rax, rax
        mov      gword ptr [r12+0x28], rax
        mov      qword ptr [r12+0x48], -1
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
        call     [rax]System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
        lea      rdi, bword ptr [r12+0x30]
        mov      rsi, rax
        call     CORINFO_HELP_ASSIGN_REF
-       mov      rdi, 0xD1FFAB1E      ; global ptr
-       test     byte  ptr [rdi], 1
-       je       G_M44586_IG14
-						;; size=175 bbWeight=1 PerfScore 26.00
+       mov      rax, 0xD1FFAB1E      ; global ptr
+       test     byte  ptr [rax], 1
+       je       G_M44586_IG12
+						;; size=180 bbWeight=1 PerfScore 27.75
 G_M44586_IG07:
-       mov      rdi, 0xD1FFAB1E      ; data for System.Threading.TimerQueue:<Instances>k__BackingField
-       mov      rbx, gword ptr [rdi]
-       mov      rdi, 0xD1FFAB1E
+       mov      rax, 0xD1FFAB1E      ; data for System.Threading.TimerQueue:<Instances>k__BackingField
+       mov      rbx, gword ptr [rax]
+       mov      gword ptr [rbp-0x50], rbx
        mov      rax, 0xD1FFAB1E      ; function address
-       call     rax
-       cmp      dword ptr [rax], 2
-       jl       G_M44586_IG15
-       mov      rax, qword ptr [rax+0x08]
-       mov      rax, qword ptr [rax+0x10]
-       test     rax, rax
-       je       G_M44586_IG15
-						;; size=61 bbWeight=1 PerfScore 15.00
+						;; size=27 bbWeight=1 PerfScore 3.50
 G_M44586_IG08:
-       mov      ecx, dword ptr [rax+0x0A38]
-       lea      edx, [rcx-0x01]
-       mov      dword ptr [rax+0x0A38], edx
-       movzx    rax, cx
-       test     eax, eax
-       je       SHORT G_M44586_IG10
-						;; size=22 bbWeight=1 PerfScore 5.00
-G_M44586_IG09:
-       sar      ecx, 16
-       jmp      SHORT G_M44586_IG11
-						;; size=5 bbWeight=0.50 PerfScore 1.25
-G_M44586_IG10:
-       mov      rax, 0xD1FFAB1E      ; code for System.Threading.ProcessorIdCache:RefreshCurrentProcessorId():int
-       call     [rax]System.Threading.ProcessorIdCache:RefreshCurrentProcessorId():int
-       mov      ecx, eax
-						;; size=14 bbWeight=0.50 PerfScore 1.75
-G_M44586_IG11:
-       mov      eax, ecx
+       call     rax ; Interop+Sys:SchedGetCpu():int
+       mov      eax, eax
        mov      rdx, 0xD1FFAB1E      ; data for System.Threading.TimerQueue:<Instances>k__BackingField
        mov      rdx, gword ptr [rdx]
        mov      edi, dword ptr [rdx+0x08]
-       xor      edx, edx
-       div      rdx:rax, rdi
+       cqo      
+       idiv     rdx:rax, rdi
+       mov      rbx, gword ptr [rbp-0x50]
        mov      edi, dword ptr [rbx+0x08]
        cmp      rdx, rdi
-       jae      G_M44586_IG17
+       jae      G_M44586_IG15
        mov      rsi, gword ptr [rbx+8*rdx+0x10]
+       mov      r12, gword ptr [rbp-0x48]
        lea      rdi, bword ptr [r12+0x08]
        call     CORINFO_HELP_ASSIGN_REF
+       mov      rdi, 0xD1FFAB1E      ; global ptr
+       cmp      dword ptr [rdi], 0
+       jne      SHORT G_M44586_IG13
+						;; size=75 bbWeight=1 PerfScore 89.00
+G_M44586_IG09:
        cmp      r13d, -1
-       je       SHORT G_M44586_IG13
-						;; size=56 bbWeight=1 PerfScore 73.75
-G_M44586_IG12:
+       je       SHORT G_M44586_IG11
+						;; size=6 bbWeight=1 PerfScore 1.25
+G_M44586_IG10:
        mov      rdi, r12
        mov      esi, r13d
        mov      edx, r14d
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.TimerQueueTimer:Change(uint,uint):ubyte:this
        call     [rax]System.Threading.TimerQueueTimer:Change(uint,uint):ubyte:this
 						;; size=21 bbWeight=0.50 PerfScore 2.00
-G_M44586_IG13:
+G_M44586_IG11:
        mov      rdi, 0xD1FFAB1E      ; System.Threading.TimerHolder
        call     CORINFO_HELP_NEWFAST
-       mov      rbx, rax
-       lea      rdi, bword ptr [rbx+0x08]
+       mov      r14, rax
+       lea      rdi, bword ptr [r14+0x08]
        mov      rsi, r12
        call     CORINFO_HELP_ASSIGN_REF
+       mov      r15, gword ptr [rbp-0x40]
        lea      rdi, bword ptr [r15+0x08]
-       mov      rsi, rbx
+       mov      rsi, r14
        call     CORINFO_HELP_ASSIGN_REF
-       jmp      SHORT G_M44586_IG18
-						;; size=44 bbWeight=1 PerfScore 7.00
-G_M44586_IG14:
+       jmp      SHORT G_M44586_IG16
+						;; size=48 bbWeight=1 PerfScore 8.00
+G_M44586_IG12:
        mov      rdi, 0xD1FFAB1E
        mov      esi, 946
        call     CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
        jmp      G_M44586_IG07
 						;; size=25 bbWeight=0 PerfScore 0.00
-G_M44586_IG15:
-       mov      edi, 2
-       call     CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED
-       jmp      G_M44586_IG08
-						;; size=15 bbWeight=0 PerfScore 0.00
-G_M44586_IG16:
+G_M44586_IG13:
+       call     CORINFO_HELP_POLL_GC
+       jmp      SHORT G_M44586_IG09
+						;; size=7 bbWeight=0 PerfScore 0.00
+G_M44586_IG14:
        mov      edi, 0x1710B
        mov      rsi, 0xD1FFAB1E
        call     CORINFO_HELP_STRCNS
        mov      rdx, rax
        mov      edi, r13d
        mov      esi, -1
        mov      rax, 0xD1FFAB1E      ; code for System.ArgumentOutOfRangeException:ThrowLess[int](int,int,System.String)
        call     [rax]System.ArgumentOutOfRangeException:ThrowLess[int](int,int,System.String)
        int3     
 						;; size=44 bbWeight=0 PerfScore 0.00
-G_M44586_IG17:
+G_M44586_IG15:
        call     CORINFO_HELP_RNGCHKFAIL
        int3     
 						;; size=6 bbWeight=0 PerfScore 0.00
-G_M44586_IG18:
+G_M44586_IG16:
        cmp      dword ptr [rbp-0x30], 0
-       je       SHORT G_M44586_IG20
+       je       SHORT G_M44586_IG18
 						;; size=6 bbWeight=1 PerfScore 3.00
-G_M44586_IG19:
+G_M44586_IG17:
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:RestoreFlow()
        call     [rax]System.Threading.ExecutionContext:RestoreFlow()
 						;; size=12 bbWeight=0.50 PerfScore 1.62
-G_M44586_IG20:
+G_M44586_IG18:
        mov      rdi, 0xD1FFAB1E      ; System.Runtime.Caching.GCHandleRef`1[System.Threading.Timer]
        call     CORINFO_HELP_NEWSFAST
-       mov      r14, rax
+       mov      r12, rax
        mov      rdi, r15
        mov      esi, 2
        call     System.Runtime.InteropServices.GCHandle:_InternalAlloc(System.Object,int):long
        test     rax, rax
-       jne      SHORT G_M44586_IG22
+       jne      SHORT G_M44586_IG20
 						;; size=36 bbWeight=1 PerfScore 4.25
-G_M44586_IG21:
+G_M44586_IG19:
        mov      rdi, r15
        mov      esi, 2
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.InteropServices.GCHandle:InternalAllocWithGCTransition(System.Object,int):long
        call     [rax]System.Runtime.InteropServices.GCHandle:InternalAllocWithGCTransition(System.Object,int):long
 						;; size=20 bbWeight=0.50 PerfScore 1.88
-G_M44586_IG22:
-       mov      qword ptr [r14+0x10], rax
+G_M44586_IG20:
+       mov      qword ptr [r12+0x10], rax
        mov      rsi, gword ptr [rbp-0x38]
        lea      rdi, bword ptr [rsi+0x08]
-       mov      rsi, r14
+       mov      rsi, r12
        call     CORINFO_HELP_ASSIGN_REF
        xor      edi, edi
        mov      dword ptr [rbp-0x2C], edi
-						;; size=25 bbWeight=1 PerfScore 5.00
-G_M44586_IG23:
-       add      rsp, 24
+						;; size=26 bbWeight=1 PerfScore 5.00
+G_M44586_IG21:
+       add      rsp, 56
        pop      rbx
        pop      r12
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=15 bbWeight=1 PerfScore 4.25
-G_M44586_IG24:
+G_M44586_IG22:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     r12
        push     rbx
        push     rax
+       vzeroupper 
        mov      rbp, qword ptr [rdi]
        mov      qword ptr [rsp], rbp
-       lea      rbp, [rbp+0x40]
-						;; size=22 bbWeight=0 PerfScore 0.00
-G_M44586_IG25:
+       lea      rbp, [rbp+0x60]
+						;; size=25 bbWeight=0 PerfScore 0.00
+G_M44586_IG23:
        cmp      dword ptr [rbp-0x30], 0
-       je       SHORT G_M44586_IG26
+       je       SHORT G_M44586_IG24
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:RestoreFlow()
        call     [rax]System.Threading.ExecutionContext:RestoreFlow()
 						;; size=18 bbWeight=0 PerfScore 0.00
-G_M44586_IG26:
+G_M44586_IG24:
        nop      
 						;; size=1 bbWeight=0 PerfScore 0.00
-G_M44586_IG27:
+G_M44586_IG25:
        add      rsp, 8
        pop      rbx
        pop      r12
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=15 bbWeight=0 PerfScore 0.00
-G_M44586_IG28:
+G_M44586_IG26:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     r12
        push     rbx
        push     rax
+       vzeroupper 
        mov      rbp, qword ptr [rdi]
        mov      qword ptr [rsp], rbp
-       lea      rbp, [rbp+0x40]
-						;; size=22 bbWeight=0 PerfScore 0.00
-G_M44586_IG29:
+       lea      rbp, [rbp+0x60]
+						;; size=25 bbWeight=0 PerfScore 0.00
+G_M44586_IG27:
        cmp      dword ptr [rbp-0x2C], 0
-       je       SHORT G_M44586_IG30
+       je       SHORT G_M44586_IG28
        mov      rdi, gword ptr [rbp-0x38]
        mov      rax, 0xD1FFAB1E      ; code for System.Runtime.Caching.MemoryCacheStatistics:Dispose():this
        call     [rax]System.Runtime.Caching.MemoryCacheStatistics:Dispose():this
 						;; size=22 bbWeight=0 PerfScore 0.00
-G_M44586_IG30:
+G_M44586_IG28:
        nop      
 						;; size=1 bbWeight=0 PerfScore 0.00
-G_M44586_IG31:
+G_M44586_IG29:
        add      rsp, 8
        pop      rbx
        pop      r12
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=15 bbWeight=0 PerfScore 0.00
 
-; Total bytes of code 852, prolog size 27, PerfScore 182.12, instruction count 213, allocated bytes for code 852 (MethodHash=c52151d5) for method System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this (FullOpts)
+; Total bytes of code 813, prolog size 30, PerfScore 182.88, instruction count 205, allocated bytes for code 813 (MethodHash=c52151d5) for method System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this (FullOpts)
-36 (-16.00 % of base) - System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong
 ; Assembly listing for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 8 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;  V00 arg0         [V00,T04] (  3,  3   )    long  ->  rdi         single-def
 ;  V01 arg1         [V01,T03] (  5,  3.50)    long  ->  rsi         single-def
 ;  V02 arg2         [V02,T05] (  3,  2.50)    long  ->  rdx         single-def
 ;  V03 loc0         [V03,T01] (  5, 10.50)   byref  ->  rdi         single-def
 ;  V04 loc1         [V04,T07] ( 14, 18.50)  simd32  ->  mm0         <System.Runtime.Intrinsics.Vector256`1[ushort]>
 ;  V05 loc2         [V05,T02] (  5,  6   )   byref  ->  rax         single-def
 ;  V06 loc3         [V06,T00] ( 12, 27   )    long  ->  rcx        
 ;  V07 loc4         [V07,T06] (  2,  4.50)    long  ->  rdx        
-;  V08 loc5         [V08,T08] (  3, 12   )  simd32  ->  mm2         <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;  V08 loc5         [V08,T09] (  3, 12   )  simd32  ->  mm2         <System.Runtime.Intrinsics.Vector256`1[ushort]>
 ;# V09 OutArgs      [V09    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V10 tmp1         [V10    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V11 tmp2         [V11    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V12 tmp3         [V12    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V13 tmp4         [V13    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V10 tmp1         [V10    ] (  0,  0   )  simd32  ->  zero-ref    "spilled call-like call argument"
+;* V11 tmp2         [V11    ] (  0,  0   )  simd32  ->  zero-ref    "spilled call-like call argument"
+;  V12 tmp3         [V12,T08] (  2, 16   )  simd32  ->  mm0         "Spilling op1 side effects for HWIntrinsic"
+;* V13 tmp4         [V13    ] (  0,  0   )  simd32  ->  zero-ref    "spilled call-like call argument"
 ;* V14 tmp5         [V14    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V15 tmp6         [V15    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V16 tmp7         [V16    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V17 tmp8         [V17    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
-;* V18 tmp9         [V18    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;  V19 cse0         [V19,T09] (  5,  7   )  simd32  ->  mm1         "CSE #02: moderate"
+;* V16 tmp7         [V16    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V17 tmp8         [V17    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V18 tmp9         [V18    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V19 tmp10        [V19    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V20 tmp11        [V20    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V21 tmp12        [V21    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V22 tmp13        [V22    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V23 tmp14        [V23    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V24 tmp15        [V24    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V25 tmp16        [V25    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V26 tmp17        [V26    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
+;* V27 tmp18        [V27    ] (  0,  0   )  simd32  ->  zero-ref    "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V28 tmp19        [V28    ] (  0,  0   )  simd32  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V29 tmp20        [V29    ] (  0,  0   )  simd32  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;  V30 cse0         [V30,T10] (  5,  7   )  simd32  ->  mm1         "CSE #01: moderate"
 ;
 ; Lcl frame size = 0
 
 G_M60588_IG01:
        push     rbp
        mov      rbp, rsp
 						;; size=4 bbWeight=1 PerfScore 1.25
 G_M60588_IG02:
        vmovups  ymm0, ymmword ptr [rdi]
        vmovups  ymm1, ymmword ptr [reloc @RWD00]
        vptest   ymm0, ymm1
        jne      G_M60588_IG10
 						;; size=23 bbWeight=1 PerfScore 15.00
 G_M60588_IG03:
        mov      rax, rsi
-       vpmovwb  ymm0, ymm2
-       vpmovwb  ymm0, ymm0
-       vinserti128 ymm0, ymm2, xmm0, 1
+       vpackuswb ymm0, ymm0, ymm0
+       vpermq   ymm0, ymm0, -40
        vmovups  xmmword ptr [rax], xmm0
        mov      ecx, 16
        test     sil, 16
        jne      SHORT G_M60588_IG04
        vmovups  ymm0, ymmword ptr [rdi+0x20]
        vptest   ymm0, ymm1
-       jne      G_M60588_IG08
-       vpmovwb  ymm0, ymm2
-       vpmovwb  ymm0, ymm0
-       vinserti128 ymm0, ymm2, xmm0, 1
+       jne      SHORT G_M60588_IG08
+       vpackuswb ymm0, ymm0, ymm0
+       vpermq   ymm0, ymm0, -40
        vmovups  xmmword ptr [rax+0x10], xmm0
-						;; size=75 bbWeight=0.50 PerfScore 16.38
+						;; size=55 bbWeight=0.50 PerfScore 11.38
 G_M60588_IG04:
        and      rsi, 31
        mov      rcx, rsi
        neg      rcx
        add      rcx, 32
        add      rdx, -32
        align    [0 bytes for IG05]
 						;; size=18 bbWeight=0.50 PerfScore 0.62
 G_M60588_IG05:
        vmovups  ymm0, ymmword ptr [rdi+2*rcx]
        vmovups  ymm2, ymmword ptr [rdi+2*rcx+0x20]
        vpor     ymm3, ymm0, ymm2
        vptest   ymm3, ymm1
        je       SHORT G_M60588_IG07
 						;; size=22 bbWeight=4 PerfScore 65.33
 G_M60588_IG06:
        vptest   ymm0, ymm1
        jne      SHORT G_M60588_IG08
-       vpmovwb  ymm0, ymm2
-       vpmovwb  ymm0, ymm0
-       vinserti128 ymm1, ymm2, xmm0, 1
-       vmovups  xmmword ptr [rax+rcx], xmm1
+       vpackuswb ymm0, ymm0, ymm0
+       vpermq   ymm2, ymm0, -40
+       vmovups  xmmword ptr [rax+rcx], xmm2
        add      rcx, 16
        jmp      SHORT G_M60588_IG08
-						;; size=36 bbWeight=0.50 PerfScore 9.12
+						;; size=28 bbWeight=0.50 PerfScore 6.62
 G_M60588_IG07:
-       vpmovwb  ymm0, ymm0
-       vpmovwb  ymm2, ymm2
-       vinserti128 ymm0, ymm0, xmm2, 1
+       vpackuswb ymm0, ymm0, ymm2
+       vpermq   ymm0, ymm0, -40
        vmovups  ymmword ptr [rax+rcx], ymm0
        add      rcx, 32
        cmp      rcx, rdx
        jbe      SHORT G_M60588_IG05
-						;; size=32 bbWeight=4 PerfScore 46.00
+						;; size=24 bbWeight=4 PerfScore 26.00
 G_M60588_IG08:
        mov      rax, rcx
 						;; size=3 bbWeight=0.50 PerfScore 0.12
 G_M60588_IG09:
        vzeroupper 
        pop      rbp
        ret      
 						;; size=5 bbWeight=0.50 PerfScore 1.25
 G_M60588_IG10:
        xor      eax, eax
 						;; size=2 bbWeight=0.50 PerfScore 0.12
 G_M60588_IG11:
        vzeroupper 
        pop      rbp
        ret      
 						;; size=5 bbWeight=0.50 PerfScore 1.25
 RWD00  	dq	FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
 
 
-; Total bytes of code 225, prolog size 4, PerfScore 156.46, instruction count 55, allocated bytes for code 225 (MethodHash=910c1353) for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong (FullOpts)
+; Total bytes of code 189, prolog size 4, PerfScore 128.96, instruction count 51, allocated bytes for code 189 (MethodHash=910c1353) for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong (FullOpts)
-18 (-6.57 % of base) - System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_512(ulong,ulong,ulong):ulong
 ; Assembly listing for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_512(ulong,ulong,ulong):ulong (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 4 single block inlinees; 0 inlinees without PGO data
+; 0 inlinees with PGO data; 8 single block inlinees; 4 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;  V00 arg0         [V00,T04] (  3,  3   )    long  ->  rdi         single-def
 ;  V01 arg1         [V01,T03] (  5,  3.50)    long  ->  rsi         single-def
 ;  V02 arg2         [V02,T05] (  3,  2.50)    long  ->  rdx         single-def
 ;  V03 loc0         [V03,T01] (  5, 10.50)   byref  ->  rdi         single-def
 ;  V04 loc1         [V04,T08] ( 14, 18.50)  simd64  ->  mm0         <System.Runtime.Intrinsics.Vector512`1[ushort]>
 ;  V05 loc2         [V05,T02] (  5,  6   )   byref  ->  rax         single-def
 ;  V06 loc3         [V06,T00] ( 12, 27   )    long  ->  rcx        
 ;  V07 loc4         [V07,T06] (  2,  4.50)    long  ->  rdx        
-;  V08 loc5         [V08,T09] (  3, 12   )  simd64  ->  mm2         <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;  V08 loc5         [V08,T10] (  3, 12   )  simd64  ->  mm3         <System.Runtime.Intrinsics.Vector512`1[ushort]>
 ;# V09 OutArgs      [V09    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V10 tmp1         [V10    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-;  V11 cse0         [V11,T10] (  5,  7   )  simd64  ->  mm1         "CSE #02: moderate"
-;  V12 rat0         [V12,T07] (  3, 24   )  simd64  ->  mm3         "ReplaceWithLclVar is creating a new local variable"
+;* V10 tmp1         [V10    ] (  0,  0   )  simd64  ->  zero-ref    "spilled call-like call argument"
+;* V11 tmp2         [V11    ] (  0,  0   )  simd64  ->  zero-ref    "spilled call-like call argument"
+;  V12 tmp3         [V12,T09] (  2, 16   )  simd64  ->  mm0         "Spilling op1 side effects for HWIntrinsic"
+;* V13 tmp4         [V13    ] (  0,  0   )  simd64  ->  zero-ref    "spilled call-like call argument"
+;* V14 tmp5         [V14    ] (  0,  0   )  simd64  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V15 tmp6         [V15    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V16 tmp7         [V16    ] (  0,  0   )  simd64  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V17 tmp8         [V17    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V18 tmp9         [V18    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V19 tmp10        [V19    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V20 tmp11        [V20    ] (  0,  0   )  simd64  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V21 tmp12        [V21    ] (  0,  0   )  simd64  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;  V22 cse0         [V22,T11] (  5,  7   )  simd64  ->  mm1         "CSE #01: moderate"
+;  V23 cse1         [V23,T12] (  5,  6   )  simd64  ->  mm2         "CSE #02: moderate"
+;  V24 rat0         [V24,T07] (  3, 24   )  simd64  ->  mm4         "ReplaceWithLclVar is creating a new local variable"
 ;
 ; Lcl frame size = 0
 
 G_M60939_IG01:
        push     rbp
        mov      rbp, rsp
 						;; size=4 bbWeight=1 PerfScore 1.25
 G_M60939_IG02:
        vmovups  zmm0, zmmword ptr [rdi]
        vmovups  zmm1, zmmword ptr [reloc @RWD00]
        vptestmw k1, zmm1, zmm0
        kortestd k1, k1
        jne      G_M60939_IG10
 						;; size=33 bbWeight=1 PerfScore 12.00
 G_M60939_IG03:
        mov      rax, rsi
-       vpmovwb  zmm0, zmm2
-       vpmovwb  zmm0, zmm0
-       vinserti64x4 zmm0, zmm2, ymm0, 1
+       vpackuswb zmm0, zmm0, zmm0
+       vmovups  zmm2, zmmword ptr [reloc @RWD64]
+       vpermq   zmm0, zmm2, zmm0
        vmovups  ymmword ptr [rax], ymm0
        mov      ecx, 32
        test     sil, 32
        jne      SHORT G_M60939_IG04
        vmovups  zmm0, zmmword ptr [rdi+0x40]
        vptestmw k1, zmm1, zmm0
        kortestd k1, k1
        jne      G_M60939_IG08
-       vpmovwb  zmm0, zmm2
-       vpmovwb  zmm0, zmm0
-       vinserti64x4 zmm0, zmm2, ymm0, 1
+       vpackuswb zmm0, zmm0, zmm0
+       vpermq   zmm0, zmm2, zmm0
        vmovups  ymmword ptr [rax+0x20], ymm0
-						;; size=85 bbWeight=0.50 PerfScore 15.38
+						;; size=81 bbWeight=0.50 PerfScore 11.88
 G_M60939_IG04:
        and      rsi, 63
        mov      rcx, rsi
        neg      rcx
        add      rcx, 64
        add      rdx, -64
        align    [0 bytes for IG05]
 						;; size=18 bbWeight=0.50 PerfScore 0.62
 G_M60939_IG05:
        vmovups  zmm0, zmmword ptr [rdi+2*rcx]
-       vmovups  zmm2, zmmword ptr [rdi+2*rcx+0x40]
-       vmovaps  zmm3, zmm0
-       vpternlogd zmm3, zmm2, zmm1, -88
-       vptestmw k1, zmm3, zmm3
+       vmovups  zmm3, zmmword ptr [rdi+2*rcx+0x40]
+       vmovaps  zmm4, zmm0
+       vpternlogd zmm4, zmm3, zmm1, -88
+       vptestmw k1, zmm4, zmm4
        kortestd k1, k1
        je       SHORT G_M60939_IG07
 						;; size=41 bbWeight=4 PerfScore 55.00
 G_M60939_IG06:
        vptestmw k1, zmm1, zmm0
        kortestd k1, k1
        jne      SHORT G_M60939_IG08
-       vpmovwb  zmm0, zmm2
-       vpmovwb  zmm0, zmm0
-       vinserti64x4 zmm1, zmm2, ymm0, 1
-       vmovups  ymmword ptr [rax+rcx], ymm1
+       vpackuswb zmm0, zmm0, zmm0
+       vpermq   zmm3, zmm2, zmm0
+       vmovups  ymmword ptr [rax+rcx], ymm3
        add      rcx, 32
        jmp      SHORT G_M60939_IG08
-						;; size=43 bbWeight=0.50 PerfScore 8.62
+						;; size=36 bbWeight=0.50 PerfScore 6.12
 G_M60939_IG07:
-       vpmovwb  zmm0, zmm0
-       vpmovwb  zmm2, zmm2
-       vinserti64x4 zmm0, zmm0, ymm2, 1
+       vpackuswb zmm0, zmm0, zmm3
+       vpermq   zmm0, zmm2, zmm0
        vmovups  zmmword ptr [rax+rcx], zmm0
        add      rcx, 64
        cmp      rcx, rdx
        jbe      SHORT G_M60939_IG05
-						;; size=35 bbWeight=4 PerfScore 46.00
+						;; size=28 bbWeight=4 PerfScore 26.00
 G_M60939_IG08:
        mov      rax, rcx
 						;; size=3 bbWeight=0.50 PerfScore 0.12
 G_M60939_IG09:
        vzeroupper 
        pop      rbp
        ret      
 						;; size=5 bbWeight=0.50 PerfScore 1.25
 G_M60939_IG10:
        xor      eax, eax
 						;; size=2 bbWeight=0.50 PerfScore 0.12
 G_M60939_IG11:
        vzeroupper 
        pop      rbp
        ret      
 						;; size=5 bbWeight=0.50 PerfScore 1.25
 RWD00  	dq	FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD64  	dq	0000000000000000h, 0000000000000002h, 0000000000000004h, 0000000000000006h, 0000000000000001h, 0000000000000003h, 0000000000000005h, 0000000000000007h
 
 
-; Total bytes of code 274, prolog size 4, PerfScore 141.62, instruction count 60, allocated bytes for code 286 (MethodHash=483911f4) for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_512(ulong,ulong,ulong):ulong (FullOpts)
+; Total bytes of code 256, prolog size 4, PerfScore 115.62, instruction count 57, allocated bytes for code 268 (MethodHash=483911f4) for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_512(ulong,ulong,ulong):ulong (FullOpts)
-16 (-2.19 % of base) - System.Net.NetworkInformation.NetworkChange:add_NetworkAvailabilityChanged(System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler)
 ; Assembly listing for method System.Net.NetworkInformation.NetworkChange:add_NetworkAvailabilityChanged(System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler) (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
-; 0 inlinees with PGO data; 13 single block inlinees; 9 inlinees without PGO data
+; 0 inlinees with PGO data; 14 single block inlinees; 8 inlinees without PGO data
 ; Final local variable assignments
 ;
-;  V00 arg0         [V00,T01] (  4,  3.50)     ref  ->  rbx         class-hnd single-def <System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler>
-;  V01 loc0         [V01,T15] (  4,  1   )     ref  ->  [rbp-0x38]  class-hnd exact EH-live spill-single-def <System.Object>
+;  V00 arg0         [V00,T01] (  4,  3.50)     ref  ->  [rbp-0x38]  class-hnd single-def <System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler>
+;  V01 loc0         [V01,T10] (  4,  1   )     ref  ->  [rbp-0x40]  class-hnd exact EH-live spill-single-def <System.Object>
 ;  V02 loc1         [V02    ] (  5,  2   )   ubyte  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed ld-addr-op
 ;  V03 loc2         [V03    ] (  3,  1   )  struct ( 8) [rbp-0x30]  do-not-enreg[XS] must-init addr-exposed ld-addr-op <System.Threading.AsyncFlowControl>
 ;# V04 OutArgs      [V04    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V05 tmp1         [V05,T10] (  2,  2   )     ref  ->  r15         class-hnd exact single-def "impAppendStmt" <System.Collections.Generic.Dictionary`2[System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler,System.Threading.ExecutionContext]>
-;  V06 tmp2         [V06,T03] (  3,  3   )     ref  ->  r15         class-hnd exact single-def "NewObj constructor temp" <System.Threading.Timer>
-;  V07 tmp3         [V07,T11] (  2,  2   )     ref  ->  r14         class-hnd exact single-def "Inlining Arg" <System.Threading.TimerCallback>
-;* V08 tmp4         [V08,T16] (  0,  0   )     int  ->  zero-ref    ld-addr-op "Inlining Arg"
-;* V09 tmp5         [V09,T19] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;* V10 tmp6         [V10,T17] (  0,  0   )     int  ->  zero-ref    ld-addr-op "Inlining Arg"
-;* V11 tmp7         [V11,T20] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;  V12 tmp8         [V12,T00] (  7,  7   )     ref  ->  r13         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerQueueTimer>
-;  V13 tmp9         [V13,T04] (  3,  3   )     ref  ->  r14         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerHolder>
-;  V14 tmp10        [V14,T05] (  3,  3   )     ref  ->  r14         class-hnd exact single-def "impAppendStmt" <System.Threading.TimerQueue[]>
-;* V15 tmp11        [V15    ] (  0,  0   )    long  ->  zero-ref    "impAppendStmt"
+;  V05 tmp1         [V05,T06] (  2,  2   )     ref  ->  r15         class-hnd exact single-def "impAppendStmt" <System.Collections.Generic.Dictionary`2[System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler,System.Threading.ExecutionContext]>
+;  V06 tmp2         [V06,T02] (  3,  3   )     ref  ->  [rbp-0x48]  class-hnd exact spill-single-def "NewObj constructor temp" <System.Threading.Timer>
+;  V07 tmp3         [V07,T07] (  2,  2   )     ref  ->  r14         class-hnd exact single-def "Inlining Arg" <System.Threading.TimerCallback>
+;* V08 tmp4         [V08,T11] (  0,  0   )     int  ->  zero-ref    ld-addr-op "Inlining Arg"
+;* V09 tmp5         [V09,T14] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
+;* V10 tmp6         [V10,T12] (  0,  0   )     int  ->  zero-ref    ld-addr-op "Inlining Arg"
+;* V11 tmp7         [V11,T15] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
+;  V12 tmp8         [V12,T00] (  7,  7   )     ref  ->  [rbp-0x50]  class-hnd exact spill-single-def "NewObj constructor temp" <System.Threading.TimerQueueTimer>
+;  V13 tmp9         [V13,T03] (  3,  3   )     ref  ->  rbx         class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerHolder>
+;  V14 tmp10        [V14,T04] (  3,  3   )     ref  ->  [rbp-0x58]  class-hnd exact spill-single-def "impAppendStmt" <System.Threading.TimerQueue[]>
+;  V15 tmp11        [V15,T09] (  2,  2   )    long  ->  rax         "impAppendStmt"
 ;* V16 tmp12        [V16    ] (  0,  0   )     int  ->  zero-ref    "Inline return value spill temp"
-;  V17 tmp13        [V17,T13] (  3,  1.50)     int  ->  rcx         "Inline return value spill temp"
-;  V18 tmp14        [V18,T02] (  4,  4   )     int  ->  rcx         "dup spill"
-;* V19 tmp15        [V19    ] (  0,  0   )     int  ->  zero-ref    "Inline stloc first use temp"
-;  V20 tmp16        [V20,T12] (  2,  2   )     ref  ->  rdx         class-hnd exact single-def "Inlining Arg" <System.Threading.ExecutionContext>
-;  V21 tmp17        [V21    ] (  3,  1   )     ref  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V03._thread (fldOffset=0x0)" P-DEP
-;* V22 tmp18        [V22    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
-;* V23 tmp19        [V23    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
-;  V24 tmp20        [V24,T06] (  3,  3   )    long  ->  rdx         "index expr"
-;  V25 PSPSym       [V25,T18] (  1,  1   )    long  ->  [rbp-0x40]  do-not-enreg[V] "PSPSym"
-;* V26 cse0         [V26,T21] (  0,  0   )    long  ->  zero-ref    "CSE #02: moderate"
-;  V27 cse1         [V27,T14] (  3,  1.50)    long  ->  rax         "CSE #01: moderate"
-;  V28 rat0         [V28,T09] (  3,  2   )    long  ->  rax         "TLS field access"
-;  V29 rat1         [V29,T07] (  3,  3   )    long  ->  rax         "TLS access"
-;  V30 rat2         [V30,T08] (  3,  3   )    long  ->  rax         "ThreadStaticBlockBase access"
+;  V17 tmp13        [V17,T08] (  2,  2   )     ref  ->  rdx         class-hnd exact single-def "Inlining Arg" <System.Threading.ExecutionContext>
+;  V18 tmp14        [V18    ] (  3,  1   )     ref  ->  [rbp-0x30]  do-not-enreg[X] addr-exposed "field V03._thread (fldOffset=0x0)" P-DEP
+;* V19 tmp15        [V19    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
+;* V20 tmp16        [V20    ] (  0,  0   )     ref  ->  zero-ref    single-def "argument with side effect"
+;  V21 tmp17        [V21,T05] (  3,  3   )    long  ->  rdx         "index expr"
+;  V22 PSPSym       [V22,T13] (  1,  1   )    long  ->  [rbp-0x60]  do-not-enreg[V] "PSPSym"
+;* V23 cse0         [V23,T16] (  0,  0   )    long  ->  zero-ref    "CSE #01: moderate"
 ;
-; Lcl frame size = 32
+; Lcl frame size = 64
 
 G_M59891_IG01:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     rbx
-       sub      rsp, 32
-       lea      rbp, [rsp+0x40]
+       sub      rsp, 64
+       vzeroupper 
+       lea      rbp, [rsp+0x60]
        xor      eax, eax
        mov      qword ptr [rbp-0x30], rax
-       mov      qword ptr [rbp-0x40], rsp
+       mov      qword ptr [rbp-0x60], rsp
        mov      rbx, rdi
-						;; size=30 bbWeight=1 PerfScore 8.25
+						;; size=33 bbWeight=1 PerfScore 9.25
 G_M59891_IG02:
+       mov      gword ptr [rbp-0x38], rbx
        test     rbx, rbx
-       je       G_M59891_IG18
-						;; size=9 bbWeight=1 PerfScore 1.25
+       je       G_M59891_IG17
+						;; size=13 bbWeight=1 PerfScore 2.25
 G_M59891_IG03:
+       mov      rbx, gword ptr [rbp-0x38]
        mov      rsi, 0xD1FFAB1E      ; const ptr
        mov      rdi, gword ptr [rsi]
-       mov      gword ptr [rbp-0x38], rdi
+       mov      gword ptr [rbp-0x40], rdi
        xor      esi, esi
        mov      dword ptr [rbp-0x28], esi
-						;; size=22 bbWeight=0.50 PerfScore 2.25
+						;; size=26 bbWeight=0.50 PerfScore 2.75
 G_M59891_IG04:
        cmp      byte  ptr [rbp-0x28], 0
-       jne      G_M59891_IG16
+       jne      G_M59891_IG15
        lea      rsi, [rbp-0x28]
        call     System.Threading.Monitor:ReliableEnter(System.Object,byref)
        mov      rax, 0xD1FFAB1E      ; data for System.Net.NetworkInformation.NetworkChange:s_socket
        cmp      gword ptr [rax], 0
        jne      SHORT G_M59891_IG05
        mov      rax, 0xD1FFAB1E      ; code for System.Net.NetworkInformation.NetworkChange:CreateSocket()
        call     [rax]System.Net.NetworkInformation.NetworkChange:CreateSocket()
 						;; size=47 bbWeight=0.50 PerfScore 6.00
 G_M59891_IG05:
        mov      rax, 0xD1FFAB1E      ; data for System.Net.NetworkInformation.NetworkChange:s_availabilityTimer
        cmp      gword ptr [rax], 0
-       jne      G_M59891_IG15
+       jne      G_M59891_IG14
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:SuppressFlow():System.Threading.AsyncFlowControl
        call     [rax]System.Threading.ExecutionContext:SuppressFlow():System.Threading.AsyncFlowControl
        mov      gword ptr [rbp-0x30], rax
 						;; size=36 bbWeight=0.50 PerfScore 4.25
 G_M59891_IG06:
        mov      rdi, 0xD1FFAB1E      ; System.Threading.Timer
        call     CORINFO_HELP_NEWSFAST
-       mov      r15, rax
+       mov      gword ptr [rbp-0x48], rax
        mov      rdi, 0xD1FFAB1E      ; const ptr
        mov      r14, gword ptr [rdi]
        mov      rdi, 0xD1FFAB1E      ; System.Threading.TimerQueueTimer
        call     CORINFO_HELP_NEWSFAST
        mov      r13, rax
+       mov      gword ptr [rbp-0x50], r13
        lea      rdi, bword ptr [r13+0x20]
        mov      rsi, r14
        call     CORINFO_HELP_ASSIGN_REF
        xor      rax, rax
        mov      gword ptr [r13+0x28], rax
        mov      qword ptr [r13+0x48], -1
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
        call     [rax]System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
        lea      rdi, bword ptr [r13+0x30]
        mov      rsi, rax
        call     CORINFO_HELP_ASSIGN_REF
-       mov      rdi, 0xD1FFAB1E      ; global ptr
-       test     byte  ptr [rdi], 1
-       je       G_M59891_IG11
-						;; size=118 bbWeight=0.50 PerfScore 9.25
+       mov      rax, 0xD1FFAB1E      ; global ptr
+       test     byte  ptr [rax], 1
+       je       G_M59891_IG10
+						;; size=123 bbWeight=0.50 PerfScore 10.12
 G_M59891_IG07:
-       mov      rdi, 0xD1FFAB1E      ; data for System.Threading.TimerQueue:<Instances>k__BackingField
-       mov      r14, gword ptr [rdi]
-       mov      rdi, 0xD1FFAB1E
-       mov      rax, 0xD1FFAB1E      ; function address
-       call     rax
-       cmp      dword ptr [rax], 2
-       jl       G_M59891_IG12
-       mov      rax, qword ptr [rax+0x08]
-       mov      rax, qword ptr [rax+0x10]
-       test     rax, rax
-       je       G_M59891_IG12
-						;; size=61 bbWeight=0.50 PerfScore 7.50
+       mov      gword ptr [rbp-0x38], rbx
+						;; size=4 bbWeight=0.25 PerfScore 0.25
 G_M59891_IG08:
-       mov      ecx, dword ptr [rax+0x0A38]
-       lea      edx, [rcx-0x01]
-       mov      dword ptr [rax+0x0A38], edx
-       movzx    rax, cx
-       test     eax, eax
-       je       SHORT G_M59891_IG09
-       sar      ecx, 16
-       jmp      SHORT G_M59891_IG10
-						;; size=27 bbWeight=0.50 PerfScore 3.75
+       mov      rax, 0xD1FFAB1E      ; data for System.Threading.TimerQueue:<Instances>k__BackingField
+       mov      r14, gword ptr [rax]
+       mov      gword ptr [rbp-0x58], r14
+       mov      rax, 0xD1FFAB1E      ; function address
+						;; size=27 bbWeight=0.50 PerfScore 1.75
 G_M59891_IG09:
-       mov      rax, 0xD1FFAB1E      ; code for System.Threading.ProcessorIdCache:RefreshCurrentProcessorId():int
-       call     [rax]System.Threading.ProcessorIdCache:RefreshCurrentProcessorId():int
-       mov      ecx, eax
-						;; size=14 bbWeight=0.50 PerfScore 1.75
-G_M59891_IG10:
-       mov      eax, ecx
+       call     rax ; Interop+Sys:SchedGetCpu():int
+       mov      eax, eax
        mov      rdx, 0xD1FFAB1E      ; data for System.Threading.TimerQueue:<Instances>k__BackingField
        mov      rdx, gword ptr [rdx]
        mov      edi, dword ptr [rdx+0x08]
-       xor      edx, edx
-       div      rdx:rax, rdi
+       cqo      
+       idiv     rdx:rax, rdi
+       mov      r14, gword ptr [rbp-0x58]
        mov      edi, dword ptr [r14+0x08]
        cmp      rdx, rdi
-       jae      SHORT G_M59891_IG13
+       jae      G_M59891_IG12
        mov      rsi, gword ptr [r14+8*rdx+0x10]
+       mov      r13, gword ptr [rbp-0x50]
        lea      rdi, bword ptr [r13+0x08]
        call     CORINFO_HELP_ASSIGN_REF
        mov      rdi, 0xD1FFAB1E      ; System.Threading.TimerHolder
        call     CORINFO_HELP_NEWFAST
-       mov      r14, rax
-       lea      rdi, bword ptr [r14+0x08]
+       mov      rbx, rax
+       lea      rdi, bword ptr [rbx+0x08]
        mov      rsi, r13
        call     CORINFO_HELP_ASSIGN_REF
+       mov      r15, gword ptr [rbp-0x48]
        lea      rdi, bword ptr [r15+0x08]
-       mov      rsi, r14
+       mov      rsi, rbx
        call     CORINFO_HELP_ASSIGN_REF
        mov      rdi, 0xD1FFAB1E      ; data for System.Net.NetworkInformation.NetworkChange:s_availabilityTimer
        mov      rsi, r15
        call     CORINFO_HELP_ASSIGN_REF
-       jmp      SHORT G_M59891_IG14
-						;; size=108 bbWeight=0.50 PerfScore 40.50
-G_M59891_IG11:
+       mov      rdi, 0xD1FFAB1E      ; global ptr
+       cmp      dword ptr [rdi], 0
+       je       SHORT G_M59891_IG13
+       jmp      SHORT G_M59891_IG11
+						;; size=141 bbWeight=0.50 PerfScore 49.25
+G_M59891_IG10:
        mov      rdi, 0xD1FFAB1E
        mov      esi, 946
        call     CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
-       jmp      G_M59891_IG07
-						;; size=25 bbWeight=0 PerfScore 0.00
-G_M59891_IG12:
-       mov      edi, 2
-       call     CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED
+       mov      gword ptr [rbp-0x38], rbx
        jmp      G_M59891_IG08
-						;; size=15 bbWeight=0 PerfScore 0.00
-G_M59891_IG13:
+						;; size=29 bbWeight=0 PerfScore 0.00
+G_M59891_IG11:
+       call     CORINFO_HELP_POLL_GC
+       jmp      SHORT G_M59891_IG13
+						;; size=7 bbWeight=0 PerfScore 0.00
+G_M59891_IG12:
        call     CORINFO_HELP_RNGCHKFAIL
        int3     
 						;; size=6 bbWeight=0 PerfScore 0.00
-G_M59891_IG14:
+G_M59891_IG13:
        lea      rdi, [rbp-0x30]
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.AsyncFlowControl:Undo():this
        call     [rax]System.Threading.AsyncFlowControl:Undo():this
-						;; size=16 bbWeight=0.50 PerfScore 1.88
-G_M59891_IG15:
+       mov      rbx, gword ptr [rbp-0x38]
+						;; size=20 bbWeight=0.50 PerfScore 2.38
+G_M59891_IG14:
        mov      rax, 0xD1FFAB1E      ; const ptr
        mov      r15, gword ptr [rax]
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
        call     [rax]System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
        mov      rdx, rax
        mov      rdi, r15
        mov      rsi, rbx
        xor      ecx, ecx
        mov      rax, 0xD1FFAB1E      ; code for System.Collections.Generic.Dictionary`2[System.__Canon,System.__Canon]:TryInsert(System.__Canon,System.__Canon,ubyte):ubyte:this
        call     [rax]System.Collections.Generic.Dictionary`2[System.__Canon,System.__Canon]:TryInsert(System.__Canon,System.__Canon,ubyte):ubyte:this
-       jmp      SHORT G_M59891_IG17
+       jmp      SHORT G_M59891_IG16
 						;; size=50 bbWeight=0.50 PerfScore 5.88
-G_M59891_IG16:
+G_M59891_IG15:
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.Monitor:ThrowLockTakenException()
        call     [rax]System.Threading.Monitor:ThrowLockTakenException()
        int3     
 						;; size=13 bbWeight=0 PerfScore 0.00
-G_M59891_IG17:
+G_M59891_IG16:
        cmp      byte  ptr [rbp-0x28], 0
-       je       SHORT G_M59891_IG18
-       mov      rdi, gword ptr [rbp-0x38]
+       je       SHORT G_M59891_IG17
+       mov      rdi, gword ptr [rbp-0x40]
        call     System.Threading.Monitor:Exit(System.Object)
 						;; size=15 bbWeight=0.50 PerfScore 2.50
-G_M59891_IG18:
+G_M59891_IG17:
        nop      
 						;; size=1 bbWeight=1 PerfScore 0.25
-G_M59891_IG19:
-       add      rsp, 32
+G_M59891_IG18:
+       add      rsp, 64
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=13 bbWeight=1 PerfScore 3.75
-G_M59891_IG20:
+G_M59891_IG19:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     rbx
        sub      rsp, 16
+       vzeroupper 
        mov      rbp, qword ptr [rdi]
        mov      qword ptr [rsp], rbp
-       lea      rbp, [rbp+0x40]
-						;; size=23 bbWeight=0 PerfScore 0.00
-G_M59891_IG21:
+       lea      rbp, [rbp+0x60]
+						;; size=26 bbWeight=0 PerfScore 0.00
+G_M59891_IG20:
        lea      rdi, [rbp-0x30]
        mov      rax, 0xD1FFAB1E      ; code for System.Threading.AsyncFlowControl:Undo():this
        call     [rax]System.Threading.AsyncFlowControl:Undo():this
        nop      
 						;; size=17 bbWeight=0 PerfScore 0.00
-G_M59891_IG22:
+G_M59891_IG21:
        add      rsp, 16
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=13 bbWeight=0 PerfScore 0.00
-G_M59891_IG23:
+G_M59891_IG22:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     rbx
        sub      rsp, 16
+       vzeroupper 
        mov      rbp, qword ptr [rdi]
        mov      qword ptr [rsp], rbp
-       lea      rbp, [rbp+0x40]
-						;; size=23 bbWeight=0 PerfScore 0.00
-G_M59891_IG24:
+       lea      rbp, [rbp+0x60]
+						;; size=26 bbWeight=0 PerfScore 0.00
+G_M59891_IG23:
        cmp      byte  ptr [rbp-0x28], 0
-       je       SHORT G_M59891_IG25
-       mov      rdi, gword ptr [rbp-0x38]
+       je       SHORT G_M59891_IG24
+       mov      rdi, gword ptr [rbp-0x40]
        call     System.Threading.Monitor:Exit(System.Object)
 						;; size=15 bbWeight=0 PerfScore 0.00
-G_M59891_IG25:
+G_M59891_IG24:
        nop      
 						;; size=1 bbWeight=0 PerfScore 0.00
-G_M59891_IG26:
+G_M59891_IG25:
        add      rsp, 16
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=13 bbWeight=0 PerfScore 0.00
 
-; Total bytes of code 731, prolog size 30, PerfScore 99.00, instruction count 181, allocated bytes for code 731 (MethodHash=6153160c) for method System.Net.NetworkInformation.NetworkChange:add_NetworkAvailabilityChanged(System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler) (FullOpts)
+; Total bytes of code 715, prolog size 33, PerfScore 100.62, instruction count 178, allocated bytes for code 715 (MethodHash=6153160c) for method System.Net.NetworkInformation.NetworkChange:add_NetworkAvailabilityChanged(System.Net.NetworkInformation.NetworkAvailabilityChangedEventHandler) (FullOpts)
-12 (-3.46 % of base) - System.HexConverter:TryDecodeFromUtf16_Vector128(System.ReadOnlySpan`1[ushort],System.Span`1[ubyte],byref):ubyte
 ; Assembly listing for method System.HexConverter:TryDecodeFromUtf16_Vector128(System.ReadOnlySpan`1[ushort],System.Span`1[ubyte],byref):ubyte (FullOpts)
 ; Emitting BLENDED_CODE for X64 with AVX512 - Unix
 ; FullOpts code
 ; optimized code
 ; rbp based frame
 ; fully interruptible
 ; No PGO data
 ; 0 inlinees with PGO data; 6 single block inlinees; 6 inlinees without PGO data
 ; Final local variable assignments
 ;
 ;* V00 arg0         [V00    ] (  0,  0   )  struct (16) zero-ref    multireg-arg ld-addr-op single-def <System.ReadOnlySpan`1[ushort]>
 ;* V01 arg1         [V01    ] (  0,  0   )  struct (16) zero-ref    multireg-arg ld-addr-op single-def <System.Span`1[ubyte]>
 ;  V02 arg2         [V02,T06] (  4,  3   )   byref  ->  rbx         single-def
 ;  V03 loc0         [V03,T00] ( 12, 42.50)    long  ->  r15        
 ;  V04 loc1         [V04,T02] (  3,  9   )    long  ->  r13        
 ;* V05 loc2         [V05,T19] (  0,  0   )   byref  ->  zero-ref    single-def
 ;* V06 loc3         [V06,T20] (  0,  0   )   byref  ->  zero-ref    single-def
 ;  V07 loc4         [V07    ] (  2,  1   )     int  ->  [rbp-0x28]  do-not-enreg[X] addr-exposed ld-addr-op
-;  V08 loc5         [V08,T23] (  3, 24   )  simd16  ->  mm7         <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;  V09 loc6         [V09,T24] (  3, 24   )  simd16  ->  mm8         <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;  V08 loc5         [V08,T22] (  3, 24   )  simd16  ->  mm7         <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;  V09 loc6         [V09,T23] (  3, 24   )  simd16  ->  mm8         <System.Runtime.Intrinsics.Vector128`1[ushort]>
 ;* V10 loc7         [V10    ] (  0,  0   )  simd16  ->  zero-ref    <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V11 loc8         [V11    ] (  0,  0   )  simd16  ->  zero-ref    <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;  V12 loc9         [V12,T25] (  3, 16   )  simd16  ->  mm9         <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V13 loc10        [V13    ] (  0,  0   )  simd16  ->  zero-ref    <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V14 loc11        [V14    ] (  0,  0   )  simd16  ->  zero-ref    <System.Runtime.Intrinsics.Vector128`1[short]>
 ;* V15 loc12        [V15    ] (  0,  0   )  simd16  ->  zero-ref    <System.Runtime.Intrinsics.Vector128`1[short]>
 ;# V16 OutArgs      [V16    ] (  1,  1   )  struct ( 0) [rsp+0x00]  do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;  V17 tmp1         [V17,T21] (  3, 48   )  simd16  ->  mm9         "dup spill"
+;  V17 tmp1         [V17,T24] (  3, 24   )  simd16  ->  mm9        
 ;* V18 tmp2         [V18    ] (  0,  0   )  struct (16) zero-ref    "impAppendStmt" <System.ReadOnlySpan`1[ushort]>
 ;* V19 tmp3         [V19    ] (  0,  0   )  struct (16) zero-ref    "spilled call-like call argument" <System.Span`1[ubyte]>
 ;  V20 tmp4         [V20,T12] (  2,  2   )     int  ->  rax         "impAppendStmt"
 ;* V21 tmp5         [V21    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
 ;* V22 tmp6         [V22    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ushort]>
 ;* V23 tmp7         [V23    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op "Inlining Arg" <System.Span`1[ubyte]>
 ;* V24 tmp8         [V24    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V25 tmp9         [V25    ] (  0,  0   )  simd16  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V26 tmp10        [V26    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V27 tmp11        [V27    ] (  0,  0   )  simd16  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;* V28 tmp12        [V28    ] (  0,  0   )  simd16  ->  zero-ref    "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
 ;* V29 tmp13        [V29    ] (  0,  0   )  simd16  ->  zero-ref    "spilled call-like call argument"
 ;* V30 tmp14        [V30    ] (  0,  0   )  simd16  ->  zero-ref    ld-addr-op "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
 ;* V31 tmp15        [V31    ] (  0,  0   )   ubyte  ->  zero-ref    "Inline return value spill temp"
 ;* V32 tmp16        [V32    ] (  0,  0   )  simd16  ->  zero-ref    "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
 ;  V33 tmp17        [V33,T07] (  4,  4   )     int  ->   r8         "Inlining Arg"
 ;* V34 tmp18        [V34    ] (  0,  0   )  struct (16) zero-ref    multireg-arg ld-addr-op "NewObj constructor temp" <System.ReadOnlySpan`1[ushort]>
 ;  V35 tmp19        [V35,T10] (  2,  2   )   byref  ->  rdi         single-def "Inlining Arg"
 ;  V36 tmp20        [V36,T13] (  2,  2   )     int  ->  rsi         "Inlining Arg"
 ;  V37 tmp21        [V37,T08] (  4,  4   )     int  ->   r8         "Inlining Arg"
 ;* V38 tmp22        [V38    ] (  0,  0   )  struct (16) zero-ref    multireg-arg ld-addr-op "NewObj constructor temp" <System.Span`1[ubyte]>
 ;  V39 tmp23        [V39,T11] (  2,  2   )   byref  ->  rdx         single-def "Inlining Arg"
 ;  V40 tmp24        [V40,T14] (  2,  2   )     int  ->  rcx         "Inlining Arg"
 ;  V41 tmp25        [V41,T01] (  4, 17.50)   byref  ->  rdi         single-def "field V00._reference (fldOffset=0x0)" P-INDEP
 ;  V42 tmp26        [V42,T05] (  5,  3.50)     int  ->  rsi         single-def "field V00._length (fldOffset=0x8)" P-INDEP
 ;  V43 tmp27        [V43,T03] (  3,  5.50)   byref  ->  rdx         single-def "field V01._reference (fldOffset=0x0)" P-INDEP
 ;  V44 tmp28        [V44,T09] (  3,  2   )     int  ->  rcx         single-def "field V01._length (fldOffset=0x8)" P-INDEP
 ;* V45 tmp29        [V45    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V18._reference (fldOffset=0x0)" P-INDEP
 ;* V46 tmp30        [V46    ] (  0,  0   )     int  ->  zero-ref    "field V18._length (fldOffset=0x8)" P-INDEP
 ;* V47 tmp31        [V47    ] (  0,  0   )   byref  ->  zero-ref    "field V19._reference (fldOffset=0x0)" P-INDEP
 ;* V48 tmp32        [V48    ] (  0,  0   )     int  ->  zero-ref    "field V19._length (fldOffset=0x8)" P-INDEP
 ;* V49 tmp33        [V49    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V22._reference (fldOffset=0x0)" P-INDEP
 ;* V50 tmp34        [V50    ] (  0,  0   )     int  ->  zero-ref    "field V22._length (fldOffset=0x8)" P-INDEP
 ;* V51 tmp35        [V51    ] (  0,  0   )   byref  ->  zero-ref    single-def "field V23._reference (fldOffset=0x0)" P-INDEP
 ;* V52 tmp36        [V52    ] (  0,  0   )     int  ->  zero-ref    "field V23._length (fldOffset=0x8)" P-INDEP
 ;  V53 tmp37        [V53,T15] (  2,  1   )   byref  ->  rdi         single-def "field V34._reference (fldOffset=0x0)" P-INDEP
 ;  V54 tmp38        [V54,T17] (  2,  1   )     int  ->  rsi         "field V34._length (fldOffset=0x8)" P-INDEP
 ;  V55 tmp39        [V55,T16] (  2,  1   )   byref  ->  rdx         single-def "field V38._reference (fldOffset=0x0)" P-INDEP
 ;  V56 tmp40        [V56,T18] (  2,  1   )     int  ->  rcx         "field V38._length (fldOffset=0x8)" P-INDEP
 ;  V57 cse0         [V57,T26] (  2,  9   )  simd16  ->  mm0         hoist "CSE #01: aggressive"
 ;  V58 cse1         [V58,T27] (  2,  9   )  simd16  ->  mm1         hoist "CSE #02: aggressive"
 ;  V59 cse2         [V59,T28] (  2,  9   )  simd16  ->  mm2         hoist "CSE #03: aggressive"
 ;  V60 cse3         [V60,T29] (  2,  9   )  simd16  ->  mm3         hoist "CSE #04: aggressive"
 ;  V61 cse4         [V61,T30] (  2,  9   )  simd16  ->  mm4         hoist "CSE #05: aggressive"
 ;  V62 cse5         [V62,T31] (  2,  9   )  simd16  ->  mm5         hoist "CSE #06: aggressive"
 ;  V63 cse6         [V63,T32] (  2,  9   )  simd16  ->  mm6         hoist "CSE #07: aggressive"
 ;  V64 cse7         [V64,T04] (  3,  6   )    long  ->  r14         "CSE #08: aggressive"
-;  V65 rat0         [V65,T22] (  3, 48   )  simd16  ->  mm7         "ReplaceWithLclVar is creating a new local variable"
+;  V65 rat0         [V65,T21] (  3, 48   )  simd16  ->  mm7         "ReplaceWithLclVar is creating a new local variable"
 ;
 ; Lcl frame size = 16
 
 G_M6966_IG01:
        push     rbp
        push     r15
        push     r14
        push     r13
        push     rbx
        sub      rsp, 16
        lea      rbp, [rsp+0x30]
        mov      rbx, r8
 						;; size=20 bbWeight=1 PerfScore 6.00
 G_M6966_IG02:
        xor      r15d, r15d
        mov      r14d, esi
        lea      r13, [r14-0x10]
        vmovups  xmm0, xmmword ptr [reloc @RWD00]
        vmovups  xmm1, xmmword ptr [reloc @RWD16]
        vmovups  xmm2, xmmword ptr [reloc @RWD32]
        vmovups  xmm3, xmmword ptr [reloc @RWD48]
        vmovups  xmm4, xmmword ptr [reloc @RWD64]
        vmovups  xmm5, xmmword ptr [reloc @RWD80]
        vmovups  xmm6, xmmword ptr [reloc @RWD96]
        jmp      SHORT G_M6966_IG04
        align    [0 bytes for IG03]
 						;; size=68 bbWeight=1 PerfScore 24.00
 G_M6966_IG03:
        mov      r15, r13
 						;; size=3 bbWeight=4 PerfScore 1.00
 G_M6966_IG04:
        vmovups  xmm7, xmmword ptr [rdi+2*r15]
        vmovups  xmm8, xmmword ptr [rdi+2*r15+0x10]
-       vpmovwb  xmm7, xmm9
-       vpmovwb  xmm8, xmm10
-       vmovlhps xmm9, xmm9, xmm10
+       vpackuswb xmm9, xmm7, xmm8
        vpaddb   xmm10, xmm0, xmm9
        vpsubusb xmm10, xmm10, xmm1
        vpsubb   xmm10, xmm10, xmm2
        vpand    xmm9, xmm3, xmm9
        vpsubb   xmm9, xmm9, xmm4
        vpaddusb xmm9, xmm9, xmm5
        vpminub  xmm9, xmm10, xmm9
        vpternlogd xmm7, xmm8, xmm6, -88
        vptest   xmm7, xmm7
        jne      SHORT G_M6966_IG06
-						;; size=75 bbWeight=8 PerfScore 160.00
+						;; size=63 bbWeight=8 PerfScore 128.00
 G_M6966_IG05:
        vpaddusb xmm7, xmm9, xmmword ptr [reloc @RWD112]
        vpmovmskb r8d, xmm7
        test     r8d, r8d
        je       SHORT G_M6966_IG08
 						;; size=17 bbWeight=4 PerfScore 21.00
 G_M6966_IG06:
        mov      r8d, r15d
        cmp      r8d, esi
        ja       G_M6966_IG11
        mov      eax, r8d
        lea      rdi, bword ptr [rdi+2*rax]
        sub      esi, r8d
        mov      r8, r15
        shr      r8, 1
        cmp      r8d, ecx
        ja       SHORT G_M6966_IG11
        mov      eax, r8d
        add      rdx, rax
        sub      ecx, r8d
        lea      r8, [rbp-0x28]
        mov      rax, 0xD1FFAB1E      ; code for System.HexConverter:TryDecodeFromUtf16_Scalar(System.ReadOnlySpan`1[ushort],System.Span`1[ubyte],byref):ubyte
        call     [rax]System.HexConverter:TryDecodeFromUtf16_Scalar(System.ReadOnlySpan`1[ushort],System.Span`1[ubyte],byref):ubyte
        add      r15d, dword ptr [rbp-0x28]
        mov      dword ptr [rbx], r15d
 						;; size=65 bbWeight=0.50 PerfScore 6.00
 G_M6966_IG07:
        add      rsp, 16
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=13 bbWeight=0.50 PerfScore 1.88
 G_M6966_IG08:
        vpmaddubsw xmm7, xmm9, xmmword ptr [reloc @RWD128]
        vpshufb  xmm7, xmm7, xmmword ptr [reloc @RWD144]
        mov      rax, r15
        shr      rax, 1
        vmovd    qword ptr [rdx+rax], xmm7
        add      r15, 16
        cmp      r15, r14
        je       SHORT G_M6966_IG09
        cmp      r15, r13
        jbe      G_M6966_IG04
        jmp      G_M6966_IG03
 						;; size=53 bbWeight=4 PerfScore 62.00
 G_M6966_IG09:
        mov      dword ptr [rbx], esi
        mov      eax, 1
 						;; size=7 bbWeight=0.50 PerfScore 0.62
 G_M6966_IG10:
        add      rsp, 16
        pop      rbx
        pop      r13
        pop      r14
        pop      r15
        pop      rbp
        ret      
 						;; size=13 bbWeight=0.50 PerfScore 1.88
 G_M6966_IG11:
        mov      rax, 0xD1FFAB1E      ; code for System.ThrowHelper:ThrowArgumentOutOfRangeException()
        call     [rax]System.ThrowHelper:ThrowArgumentOutOfRangeException()
        int3     
 						;; size=13 bbWeight=0 PerfScore 0.00
 RWD00  	dq	C6C6C6C6C6C6C6C6h, C6C6C6C6C6C6C6C6h
 RWD16  	dq	0606060606060606h, 0606060606060606h
 RWD32  	dq	F0F0F0F0F0F0F0F0h, F0F0F0F0F0F0F0F0h
 RWD48  	dq	DFDFDFDFDFDFDFDFh, DFDFDFDFDFDFDFDFh
 RWD64  	dq	4141414141414141h, 4141414141414141h
 RWD80  	dq	0A0A0A0A0A0A0A0Ah, 0A0A0A0A0A0A0A0Ah
 RWD96  	dq	FF80FF80FF80FF80h, FF80FF80FF80FF80h
 RWD112 	dq	7070707070707070h, 7070707070707070h
 RWD128 	dq	0110011001100110h, 0110011001100110h
 RWD144 	dq	0E0C0A0806040200h, 0000000000000000h
 
 
-; Total bytes of code 347, prolog size 20, PerfScore 284.38, instruction count 88, allocated bytes for code 347 (MethodHash=bb7ae4c9) for method System.HexConverter:TryDecodeFromUtf16_Vector128(System.ReadOnlySpan`1[ushort],System.Span`1[ubyte],byref):ubyte (FullOpts)
+; Total bytes of code 335, prolog size 20, PerfScore 252.38, instruction count 86, allocated bytes for code 335 (MethodHash=bb7ae4c9) for method System.HexConverter:TryDecodeFromUtf16_Vector128(System.ReadOnlySpan`1[ushort],System.Span`1[ubyte],byref):ubyte (FullOpts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment