Skip to content

[SLPVectorizer] Add reduction of integer not vectorized #55693

@nikic

Description

@nikic
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 
target triple = "x86_64-unknown-linux-gnu"
  
define i16 @test(ptr %x) { 
start:
  %_51 = load i64, ptr %x, align 8
  %_4.i.i.i.sroa.4.16.extract.trunc = trunc i64 %_51 to i16
  %0 = and i16 %_4.i.i.i.sroa.4.16.extract.trunc, 255
  %1 = trunc i64 %_51 to i16
  %2 = lshr i16 %1, 8
  %_6.0.i.i.i.i.i.i.i.i.i.1 = add nuw nsw i16 %0, %2
  %_4.i.i.i.sroa.4.18.extract.shift = lshr i64 %_51, 16
  %_4.i.i.i.sroa.4.18.extract.trunc = trunc i64 %_4.i.i.i.sroa.4.18.extract.shift to i16
  %3 = and i16 %_4.i.i.i.sroa.4.18.extract.trunc, 255
  %_6.0.i.i.i.i.i.i.i.i.i.2 = add nuw nsw i16 %_6.0.i.i.i.i.i.i.i.i.i.1, %3
  %_4.i.i.i.sroa.4.19.extract.shift = lshr i64 %_51, 24
  %_4.i.i.i.sroa.4.19.extract.trunc = trunc i64 %_4.i.i.i.sroa.4.19.extract.shift to i16
  %4 = and i16 %_4.i.i.i.sroa.4.19.extract.trunc, 255
  %_6.0.i.i.i.i.i.i.i.i.i.3 = add nuw nsw i16 %_6.0.i.i.i.i.i.i.i.i.i.2, %4
  %_4.i.i.i.sroa.4.20.extract.shift = lshr i64 %_51, 32
  %_4.i.i.i.sroa.4.20.extract.trunc = trunc i64 %_4.i.i.i.sroa.4.20.extract.shift to i16
  %5 = and i16 %_4.i.i.i.sroa.4.20.extract.trunc, 255
  %_6.0.i.i.i.i.i.i.i.i.i.4 = add nuw nsw i16 %_6.0.i.i.i.i.i.i.i.i.i.3, %5
  %_4.i.i.i.sroa.4.21.extract.shift = lshr i64 %_51, 40
  %_4.i.i.i.sroa.4.21.extract.trunc = trunc i64 %_4.i.i.i.sroa.4.21.extract.shift to i16
  %6 = and i16 %_4.i.i.i.sroa.4.21.extract.trunc, 255
  %_6.0.i.i.i.i.i.i.i.i.i.5 = add nuw nsw i16 %_6.0.i.i.i.i.i.i.i.i.i.4, %6
  %_4.i.i.i.sroa.4.22.extract.shift = lshr i64 %_51, 48
  %_4.i.i.i.sroa.4.22.extract.trunc = trunc i64 %_4.i.i.i.sroa.4.22.extract.shift to i16 
  %7 = and i16 %_4.i.i.i.sroa.4.22.extract.trunc, 255
  %_6.0.i.i.i.i.i.i.i.i.i.6 = add nuw nsw i16 %_6.0.i.i.i.i.i.i.i.i.i.5, %7
  %_4.i.i.i.sroa.4.23.extract.shift = lshr i64 %_51, 56
  %_4.i.i.i.sroa.4.23.extract.trunc = trunc i64 %_4.i.i.i.sroa.4.23.extract.shift to i16
  %_6.0.i.i.i.i.i.i.i.i.i.7 = add nuw nsw i16 %_6.0.i.i.i.i.i.i.i.i.i.6, %_4.i.i.i.sroa.4.23.extract.trunc
  ret i16 %_6.0.i.i.i.i.i.i.i.i.i.7
} 

This could converted into a zext <8 x i8> to <8 x i16> followed by vector.reduce.add, but currently isn't. (I expect this to be profitable based on psadbw, but I'm not particularly familiar with this.)

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions