; ModuleID = 'aec_vec.c'
source_filename = "aec_vec.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nofree norecurse nounwind uwtable
define dso_local
void @vec(i64 %n, i8* nocapture %out, i8* nocapture readonly %in) local_unnamed_addr #
0 {
entry:
%cmp6 = icmp eq
i64 %n, 0
br i1 %cmp6, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
%min.iters.check = icmp ult
i64 %n, 128
br i1 %min.iters.check, label %for.body.preheader12, label %vector.memcheck
for.body.preheader12: ; preds = %middle.block, %vector.memcheck, %for.body.preheader
%i.07.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
%0 = xor i64 %i.07.ph, -1
%xtraiter = and i64 %n, 1
%lcmp.mod = icmp eq
i64 %xtraiter, 0
br i1 %lcmp.mod, label %for.body.prol.loopexit, label %for.body.prol
for.body.prol: ; preds = %for.body.preheader12
%arrayidx.prol = getelementptr inbounds i8, i8* %in, i64 %i.07.ph
%1 = load i8, i8* %arrayidx.prol, align 1, !tbaa
!2
%.neg.i.prol = ashr i8 %1, 7
%shl.i
.prol
= shl i8 %1, 1
%2 = and i8 %.neg.i.prol, -90
%xor10.i.prol = xor i8 %2, %shl.i
.prol
%arrayidx1.prol = getelementptr inbounds i8, i8* %out, i64 %i.07.ph
store i8 %xor10.i.prol, i8* %arrayidx1.prol, align 1, !tbaa
!2
%inc.prol = or i64 %i.07.ph, 1
br label %for.body.prol.loopexit
for.body.prol.loopexit: ; preds = %for.body.preheader12, %for.body.prol
%i.07.unr = phi i64 [ %i.07.ph, %for.body.preheader12 ], [ %inc.prol, %for.body.prol ]
br i1 %4, label %for.cond.cleanup, label %for.body
vector.memcheck: ; preds = %for.body.preheader
%scevgep = getelementptr i8, i8* %out, i64 %n
%scevgep8 = getelementptr i8, i8* %in, i64 %n
%bound0 = icmp ugt
i8* %scevgep8, %out
%bound1 = icmp ugt
i8* %scevgep, %in
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %for.body.preheader12, label %vector.ph
vector.ph: ; preds = %vector.memcheck
%n.vec = and i64 %n, -128
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%5 = getelementptr inbounds i8, i8* %in, i64 %index
%6 = bitcast i8* %5 to <32 x
i8>*
%wide.load = load <32 x
i8>, <32 x
i8>* %6, align 1, !tbaa
!2, !alias
.scope
!5
%7 = getelementptr inbounds i8, i8* %5, i64 32
%8 = bitcast i8* %7 to <32 x
i8>*
%wide.load9 = load <32 x
i8>, <32 x
i8>* %8, align 1, !tbaa
!2, !alias
.scope
!5
%9 = getelementptr inbounds i8, i8* %5, i64 64
%10 = bitcast i8* %9 to <32 x
i8>*
%wide.load10 = load <32 x
i8>, <32 x
i8>* %10, align 1, !tbaa
!2, !alias
.scope
!5
%11 = getelementptr inbounds i8, i8* %5, i64 96
%12 = bitcast i8* %11 to <32 x
i8>*
%wide.load11 = load <32 x
i8>, <32 x
i8>* %12, align 1, !tbaa
!2, !alias
.scope
!5
%13 = ashr <32 x
i8> %wide.load, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%14 = ashr <32 x
i8> %wide.load9, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%15 = ashr <32 x
i8> %wide.load10, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%16 = ashr <32 x
i8> %wide.load11, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%17 = shl <32 x
i8> %wide.load, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%18 = shl <32 x
i8> %wide.load9, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%19 = shl <32 x
i8> %wide.load10, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%20 = shl <32 x
i8> %wide.load11, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%21 = and <32 x
i8> %13, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
%22 = and <32 x
i8> %14, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
%23 = and <32 x
i8> %15, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
%24 = and <32 x
i8> %16, <i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90, i8 -90>
%25 = xor <32 x
i8> %21, %17
%26 = xor <32 x
i8> %22, %18
%27 = xor <32 x
i8> %23, %19
%28 = xor <32 x
i8> %24, %20
%29 = getelementptr inbounds i8, i8* %out, i64 %index
%30 = bitcast i8* %29 to <32 x
i8>*
store <32 x
i8> %25, <32 x
i8>* %30, align 1, !tbaa
!2, !alias
.scope
!8, !noalias !5
%31 = getelementptr inbounds i8, i8* %29, i64 32
%32 = bitcast i8* %31 to <32 x
i8>*
store <32 x
i8> %26, <32 x
i8>* %32, align 1, !tbaa
!2, !alias
.scope
!8, !noalias !5
%33 = getelementptr inbounds i8, i8* %29, i64 64
%34 = bitcast i8* %33 to <32 x
i8>*
store <32 x
i8> %27, <32 x
i8>* %34, align 1, !tbaa
!2, !alias
.scope
!8, !noalias !5
%35 = getelementptr inbounds i8, i8* %29, i64 96
%36 = bitcast i8* %35 to <32 x
i8>*
store <32 x
i8> %28, <32 x
i8>* %36, align 1, !tbaa
!2, !alias
.scope
!8, !noalias !5
%index.next = add i64 %index, 128
%37 = icmp eq
i64 %index.next, %n.vec
br i1 %37, label %middle.block, label %vector.body, !llvm
.loop
!10
middle.block: ; preds = %vector.body
%cmp.n = icmp eq
i64 %n.vec, %n
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader12
for.cond.cleanup: ; preds = %for.body.prol.loopexit, %for.body, %middle.block, %entry
ret void
for.body: ; preds = %for.body.prol.loopexit, %for.body
%i.07 = phi i64 [ %inc.1, %for.body ], [ %i.07.unr, %for.body.prol.loopexit ]
%arrayidx = getelementptr inbounds i8, i8* %in, i64 %i.07
%38 = load i8, i8* %arrayidx, align 1, !tbaa
!2
%39 = and i8 %.neg.i, -90
%xor10.i = xor i8 %39, %shl.i
%arrayidx1 = getelementptr inbounds i8, i8* %out, i64 %i.07
store i8 %xor10.i, i8* %arrayidx1, align 1, !tbaa
!2
%inc = add nuw i64 %i.07, 1
%arrayidx.1 = getelementptr inbounds i8, i8* %in, i64 %inc
%40 = load i8, i8* %arrayidx.1, align 1, !tbaa
!2
%.neg.i.1 = ashr i8 %40, 7
%41 = and i8 %.neg.i.1, -90
%xor10.i.1 = xor i8 %41, %shl.i
.1
%arrayidx1.1 = getelementptr inbounds i8, i8* %out, i64 %inc
store i8 %xor10.i.1, i8* %arrayidx1.1, align 1, !tbaa
!2
%inc.1 = add nuw i64 %i.07, 2
%exitcond.1 = icmp eq
i64 %inc.1, %n
br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm
.loop
!12
}
attributes #0 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+avx,+avx2,+cx8,+fxsr,+gfni,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project 37c4ce5877804fd8bbd99f753a0517ea01b27405)"}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6}
!6 = distinct !{!6, !7}
!7 = distinct !{!7, !"LVerDomain"}
!8 = !{!9}
!9 = distinct !{!9, !7}
!10 = distinct !{!10, !11}
!11 = !{!"llvm.loop.isvectorized", i32 1}
!12 = distinct !{!12, !11}