Untitled

; ModuleID = 'aec_vec.c'
source_filename = "aec_vec.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nounwind uwtable
define dso_local void @vec(i64 %n, i8* nocapture %out, i8* nocapture readonly %in) local_unnamed_addr #0 {
entry:
  %cmp6 = icmp eq i64 %n, 0
  br i1 %cmp6, label %for.cond.cleanup, label %for.body.preheader

for.body.preheader:                               ; preds = %entry
  %min.iters.check = icmp ult i64 %n, 128
  br i1 %min.iters.check, label %for.body.preheader12, label %vector.memcheck

for.body.preheader12:                             ; preds = %middle.block, %vector.memcheck, %for.body.preheader
  %i.07.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
  %0 = xor i64 %i.07.ph, -1
  %xtraiter = and i64 %n, 1
  %lcmp.mod = icmp eq i64 %xtraiter, 0
  br i1 %lcmp.mod, label %for.body.prol.loopexit, label %for.body.prol

for.body.prol:                                    ; preds = %for.body.preheader12
  %arrayidx.prol = getelementptr inbounds i8, i8* %in, i64 %i.07.ph
  %1 = load i8, i8* %arrayidx.prol, align 1, !tbaa !2
  %.neg.i.prol = ashr i8 %1, 7
  %shl.i.prol = shl i8 %1, 1
  %2 = and i8 %.neg.i.prol, -90
  %xor10.i.prol = xor i8 %2, %shl.i.prol
  %arrayidx1.prol = getelementptr inbounds i8, i8* %out, i64 %i.07.ph
  store i8 %xor10.i.prol, i8* %arrayidx1.prol, align 1, !tbaa !2
  %inc.prol = or i64 %i.07.ph, 1
  br label %for.body.prol.loopexit

for.body.prol.loopexit:                           ; preds = %for.body.preheader12, %for.body.prol
  %i.07.unr = phi i64 [ %i.07.ph, %for.body.preheader12 ], [ %inc.prol, %for.body.prol ]
  %3 = sub i64 0, %n
  %4 = icmp eq i64 %0, %3
  br i1 %4, label %for.cond.cleanup, label %for.body

vector.memcheck:                                  ; preds = %for.body.preheader
  %scevgep = getelementptr i8, i8* %out, i64 %n
  %scevgep8 = getelementptr i8, i8* %in, i64 %n
  %bound0 = icmp ugt i8* %scevgep8, %out
  %bound1 = icmp ugt i8* %scevgep, %in
  %found.conflict = and i1 %bound0, %bound1
  br i1 %found.conflict, label %for.body.preheader12, label %vector.ph

vector.ph:                                        ; preds = %vector.memcheck
  %n.vec = and i64 %n, -128
  br label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %5 = getelementptr inbounds i8, i8* %in, i64 %index
  %6 = bitcast i8* %5 to <32 x i8>*
  %wide.load = load <32 x i8>, <32 x i8>* %6, align 1, !tbaa !2, !alias.scope !5
  %7 = getelementptr inbounds i8, i8* %5, i64 32
  %8 = bitcast i8* %7 to <32 x i8>*
  %wide.load9 = load <32 x i8>, <32 x i8>* %8, align 1, !tbaa !2, !alias.scope !5
  %9 = getelementptr inbounds i8, i8* %5, i64 64
  %10 = bitcast i8* %9 to <32 x i8>*
  %wide.load10 = load <32 x i8>, <32 x i8>* %10, align 1, !tbaa !2, !alias.scope !5
  %11 = getelementptr inbounds i8, i8* %5, i64 96
  %12 = bitcast i8* %11 to <32 x i8>*
  %wide.load11 = load <32 x i8>, <32 x i8>* %12, align 1, !tbaa !2, !alias.scope !5
  %13 = ashr <32 x i8> %wide.load, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  %14 = ashr <32 x i8> %wide.load9, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  %15 = ashr <32 x i8> %wide.load10, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  %16 = ashr <32 x i8> %wide.load11, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  %17 = shl <32 x i8> %wide.load, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %18 = shl <32 x i8> %wide.load9, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %19 = shl <32 x i8> %wide.load10, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %20 = shl <32 x i8> %wide.load11, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %21 = and <32 x i8> %13, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
  %22 = and <32 x i8> %14, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
  %23 = and <32 x i8> %15, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
  %24 = and <32 x i8> %16, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
  %25 = xor <32 x i8> %21, %17
  %26 = xor <32 x i8> %22, %18
  %27 = xor <32 x i8> %23, %19
  %28 = xor <32 x i8> %24, %20
  %29 = getelementptr inbounds i8, i8* %out, i64 %index
  %30 = bitcast i8* %29 to <32 x i8>*
  store <32 x i8> %25, <32 x i8>* %30, align 1, !tbaa !2, !alias.scope !8, !noalias !5
  %31 = getelementptr inbounds i8, i8* %29, i64 32
  %32 = bitcast i8* %31 to <32 x i8>*
  store <32 x i8> %26, <32 x i8>* %32, align 1, !tbaa !2, !alias.scope !8, !noalias !5
  %33 = getelementptr inbounds i8, i8* %29, i64 64
  %34 = bitcast i8* %33 to <32 x i8>*
  store <32 x i8> %27, <32 x i8>* %34, align 1, !tbaa !2, !alias.scope !8, !noalias !5
  %35 = getelementptr inbounds i8, i8* %29, i64 96
  %36 = bitcast i8* %35 to <32 x i8>*
  store <32 x i8> %28, <32 x i8>* %36, align 1, !tbaa !2, !alias.scope !8, !noalias !5
  %index.next = add i64 %index, 128
  %37 = icmp eq i64 %index.next, %n.vec
  br i1 %37, label %middle.block, label %vector.body, !llvm.loop !10

middle.block:                                     ; preds = %vector.body
  %cmp.n = icmp eq i64 %n.vec, %n
  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader12

for.cond.cleanup:                                 ; preds = %for.body.prol.loopexit, %for.body, %middle.block, %entry
  ret void

for.body:                                         ; preds = %for.body.prol.loopexit, %for.body
  %i.07 = phi i64 [ %inc.1, %for.body ], [ %i.07.unr, %for.body.prol.loopexit ]
  %arrayidx = getelementptr inbounds i8, i8* %in, i64 %i.07
  %38 = load i8, i8* %arrayidx, align 1, !tbaa !2
  %.neg.i = ashr i8 %38, 7
  %shl.i = shl i8 %38, 1
  %39 = and i8 %.neg.i, -90
  %xor10.i = xor i8 %39, %shl.i
  %arrayidx1 = getelementptr inbounds i8, i8* %out, i64 %i.07
  store i8 %xor10.i, i8* %arrayidx1, align 1, !tbaa !2
  %inc = add nuw i64 %i.07, 1
  %arrayidx.1 = getelementptr inbounds i8, i8* %in, i64 %inc
  %40 = load i8, i8* %arrayidx.1, align 1, !tbaa !2
  %.neg.i.1 = ashr i8 %40, 7
  %shl.i.1 = shl i8 %40, 1
  %41 = and i8 %.neg.i.1, -90
  %xor10.i.1 = xor i8 %41, %shl.i.1
  %arrayidx1.1 = getelementptr inbounds i8, i8* %out, i64 %inc
  store i8 %xor10.i.1, i8* %arrayidx1.1, align 1, !tbaa !2
  %inc.1 = add nuw i64 %i.07, 2
  %exitcond.1 = icmp eq i64 %inc.1, %n
  br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm.loop !12
}

attributes #0 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+avx2,+cx8,+fxsr,+gfni,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project 37c4ce5877804fd8bbd99f753a0517ea01b27405)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6}
!6 = distinct !{!6, !7}
!7 = distinct !{!7, !"LVerDomain"}
!8 = !{!9}
!9 = distinct !{!9, !7}
!10 = distinct !{!10, !11}
!11 = !{!"llvm.loop.isvectorized", i32 1}
!12 = distinct !{!12, !11}