Untitled

; Function Attrs: nounwind
define i32 @memory_profiler_mandelbrot(float %x_min, float %x_max, float %y_min, float %y_max, float %c_real, float %c_imag, i32 %iters, i32 %w, i32 %h, %struct.halide_buffer_t* noalias %f1.buffer) local_unnamed_addr #8 {
entry:
  %__cilkrts_sf = alloca %__cilkrts_stack_frame, align 8
  call void @__cilkrts_enter_frame_1(%__cilkrts_stack_frame* %__cilkrts_sf)
  %0 = alloca i8*, align 8
  store i8* null, i8** %0, align 8
  %1 = alloca i8*, align 8
  store i8* null, i8** %1, align 8
  %2 = alloca [8 x i32], align 4
  %profiling_func_stack_peak_buf55 = alloca [4 x i64], align 32
  %profiling_func_names56 = alloca <4 x i64>, align 32
  %profiling_func_names56.sub = getelementptr inbounds <4 x i64>, <4 x i64>* %profiling_func_names56, i64 0, i64 0
  %3 = tail call %struct.halide_profiler_state* @halide_profiler_get_state() #9
  %4 = bitcast %struct.halide_profiler_state* %3 to i8*
  store <4 x i64> <i64 ptrtoint ([9 x i8]* @str to i64), i64 ptrtoint ([3 x i8]* @str.163 to i64), i64 ptrtoint ([3 x i8]* @str.164 to i64), i64 ptrtoint ([7 x i8]* @str.165 to i64)>, <4 x i64>* %profiling_func_names56, align 32, !tbaa !185
  %5 = getelementptr inbounds [4 x i64], [4 x i64]* %profiling_func_stack_peak_buf55, i64 0, i64 3
  %6 = bitcast [4 x i64]* %profiling_func_stack_peak_buf55 to i8*
  call void @llvm.memset.p0i8.i64(i8* nonnull %6, i8 0, i64 24, i32 32, i1 false)
  store i64 20, i64* %5, align 8, !tbaa !197
  %profiler_token = call i32 @halide_profiler_pipeline_start(i8* null, i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str.166, i64 0, i64 0), i32 4, i64* nonnull %profiling_func_names56.sub) #9
  %7 = icmp sgt i32 %profiler_token, -1
  br i1 %7, label %"assert succeeded", label %destructor_block, !prof !210

destructor_block:                                 ; preds = %entry, %"assert failed30", %"assert failed26", %"assert failed24", %"assert failed22", %"assert failed20", %"assert failed16", %"assert failed14", %"assert failed12", %"assert failed10", %"assert failed8", %"assert failed6", %"assert failed1"
  %8 = phi i32 [ %13, %"assert failed1" ], [ %64, %"assert failed6" ], [ %69, %"assert failed8" ], [ %71, %"assert failed10" ], [ %76, %"assert failed12" ], [ %78, %"assert failed14" ], [ %80, %"assert failed16" ], [ %88, %"assert failed20" ], [ %90, %"assert failed22" ], [ %233, %"assert failed24" ], [ %236, %"assert failed26" ], [ %239, %"assert failed30" ], [ %profiler_token, %entry ]
  %.0.127 = load i8*, i8** %0, align 8
  store i8* null, i8** %0, align 8
  %tobool.i = icmp eq i8* %.0.127, null
  %should_call.not.i = icmp eq i32 %8, 0
  %brmerge.i = or i1 %should_call.not.i, %tobool.i
  br i1 %brmerge.i, label %call_destructor.exit, label %if.then.i

if.then.i:                                        ; preds = %destructor_block
  call void @halide_free(i8* null, i8* nonnull %.0.127) #12
  br label %call_destructor.exit

call_destructor.exit:                             ; preds = %destructor_block, %if.then.i
  %.0.125 = load i8*, i8** %1, align 8
  store i8* null, i8** %1, align 8
  %tobool.i60 = icmp eq i8* %.0.125, null
  %brmerge.i62 = or i1 %should_call.not.i, %tobool.i60
  br i1 %brmerge.i62, label %call_destructor.exit65, label %if.then.i63

if.then.i63:                                      ; preds = %call_destructor.exit
  call void @halide_free(i8* null, i8* nonnull %.0.125) #12
  br label %call_destructor.exit65

call_destructor.exit65:                           ; preds = %call_destructor.exit, %call_destructor.exit.thread, %if.then.i63
  %9 = phi i32 [ %8, %if.then.i63 ], [ 0, %call_destructor.exit.thread ], [ %8, %call_destructor.exit ]
  %tobool.i66 = icmp eq %struct.halide_profiler_state* %3, null
  br i1 %tobool.i66, label %call_destructor.exit69, label %if.then.i67

if.then.i67:                                      ; preds = %call_destructor.exit65
  call void @halide_profiler_pipeline_end(i8* null, i8* nonnull %4) #12
  br label %call_destructor.exit69

call_destructor.exit69:                           ; preds = %call_destructor.exit65, %if.then.i67
  call void @__cilk_parent_epilogue(%__cilkrts_stack_frame* %__cilkrts_sf)
  ret i32 %9

"assert succeeded":                               ; preds = %entry
  %profiling_func_stack_peak_buf55.sub = getelementptr inbounds [4 x i64], [4 x i64]* %profiling_func_stack_peak_buf55, i64 0, i64 0
  %profiler_state = call %struct.halide_profiler_state* @halide_profiler_get_state() #9
  %profiler_pipeline_state = call %struct.halide_profiler_pipeline_stats* @halide_profiler_get_pipeline_state(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str.166, i64 0, i64 0)) #9
  %active_threads.i = getelementptr inbounds %struct.halide_profiler_state, %struct.halide_profiler_state* %profiler_state, i64 0, i32 4
  call void asm sideeffect "", ""() #9, !srcloc !177
  %10 = atomicrmw add i32* %active_threads.i, i32 1 seq_cst
  call void asm sideeffect "", ""() #9, !srcloc !178
  %11 = bitcast %struct.halide_profiler_pipeline_stats* %profiler_pipeline_state to i8*
  call void @halide_profiler_stack_peak_update(i8* null, i8* %11, i64* nonnull %profiling_func_stack_peak_buf55.sub) #9
  %12 = icmp eq %struct.halide_buffer_t* %f1.buffer, null
  br i1 %12, label %"assert failed1", label %"assert succeeded2", !prof !211

"assert failed1":                                 ; preds = %"assert succeeded"
  %13 = call i32 @halide_error_buffer_argument_is_null(i8* null, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.163, i64 0, i64 0)) #9
  br label %destructor_block

"assert succeeded2":                              ; preds = %"assert succeeded"
  %host.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 2
  %14 = load i8*, i8** %host.i, align 8, !tbaa !87
  %code.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 4, i32 0
  %15 = load i8, i8* %code.i, align 8, !tbaa !212
  %bits.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 4, i32 1
  %16 = load i8, i8* %bits.i, align 1, !tbaa !213
  %lanes.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 4, i32 2
  %17 = load i16, i16* %lanes.i, align 2, !tbaa !214
  %dim.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 6
  %18 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %dim.i, align 8, !tbaa !67
  %min.i = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %18, i64 0, i32 0
  %19 = load i32, i32* %min.i, align 4, !tbaa !71
  %extent.i = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %18, i64 0, i32 1
  %20 = load i32, i32* %extent.i, align 4, !tbaa !69
  %stride.i = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %18, i64 0, i32 2
  %21 = load i32, i32* %stride.i, align 4, !tbaa !72
  %min.i77 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %18, i64 1, i32 0
  %22 = load i32, i32* %min.i77, align 4, !tbaa !71
  %extent.i81 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %18, i64 1, i32 1
  %23 = load i32, i32* %extent.i81, align 4, !tbaa !69
  %stride.i83 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %18, i64 1, i32 2
  %24 = load i32, i32* %stride.i83, align 4, !tbaa !72
  %25 = add nsw i32 %20, %19
  %b2 = add nsw i32 %25, -8
  %26 = icmp sgt i32 %b2, %19
  %27 = select i1 %26, i32 %19, i32 %b2
  %28 = add nsw i32 %20, -1
  %29 = and i32 %28, -8
  %30 = add i32 %19, 7
  %a1 = add i32 %30, %29
  %b3 = add nsw i32 %25, -1
  %31 = icmp sgt i32 %b3, %a1
  %32 = select i1 %31, i32 %a1, i32 %b3
  %33 = add nsw i32 %23, %22
  %b5 = add nsw i32 %33, -8
  %34 = icmp sgt i32 %b5, %22
  %35 = select i1 %34, i32 %22, i32 %b5
  %36 = add nsw i32 %23, -1
  %37 = and i32 %36, -8
  %38 = add i32 %22, 7
  %a4 = add i32 %38, %37
  %b6 = add nsw i32 %33, -1
  %39 = icmp sgt i32 %b6, %a4
  %40 = select i1 %39, i32 %a4, i32 %b6
  %cmp.i = icmp eq i8* %14, null
  br i1 %cmp.i, label %_halide_buffer_is_bounds_query.exit, label %true_bb3

_halide_buffer_is_bounds_query.exit:              ; preds = %"assert succeeded2"
  %device.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 0
  %41 = load i64, i64* %device.i, align 8, !tbaa !117
  %cmp1.i = icmp eq i64 %41, 0
  br i1 %cmp1.i, label %after_bb, label %_halide_buffer_is_bounds_query.exit101

after_bb:                                         ; preds = %_halide_buffer_is_bounds_query.exit
  %f1.extent.0.required.s = sub nsw i32 %32, %27
  %42 = add nsw i32 %f1.extent.0.required.s, 1
  %f1.extent.1.required.s = sub i32 1, %35
  %43 = add i32 %f1.extent.1.required.s, %40
  %44 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 0
  store i32 %27, i32* %44, align 4
  %45 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 1
  store i32 %42, i32* %45, align 4
  %46 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 2
  store i32 1, i32* %46, align 4
  %47 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 3
  store i32 0, i32* %47, align 4
  %48 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 4
  store i32 %35, i32* %48, align 4
  %49 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 5
  store i32 %43, i32* %49, align 4
  %50 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 6
  store i32 %42, i32* %50, align 4
  %51 = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 7
  store i32 0, i32* %51, align 4
  store i8 0, i8* %code.i, align 8, !tbaa !212
  %52 = bitcast %struct.halide_buffer_t* %f1.buffer to i8*
  call void @llvm.memset.p0i8.i64(i8* %52, i8 0, i64 24, i32 8, i1 false)
  store i8 32, i8* %bits.i, align 1, !tbaa !213
  store i16 1, i16* %lanes.i, align 2, !tbaa !214
  %dimensions7.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 5
  store i32 2, i32* %dimensions7.i, align 4, !tbaa !65
  %53 = bitcast %struct.halide_dimension_t* %18 to i8*
  %54 = bitcast [8 x i32]* %2 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %53, i8* nonnull %54, i64 16, i32 4, i1 false) #9, !tbaa.struct !68
  %.pre.i = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %dim.i, align 8, !tbaa !67
  %arrayidx.i = getelementptr inbounds [8 x i32], [8 x i32]* %2, i64 0, i64 4
  %arrayidx12.i = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %.pre.i, i64 1
  %55 = bitcast %struct.halide_dimension_t* %arrayidx12.i to i8*
  %56 = bitcast i32* %arrayidx.i to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %55, i8* %56, i64 16, i32 4, i1 false) #9, !tbaa.struct !68
  %flags13.i = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 3
  store i64 0, i64* %flags13.i, align 8, !tbaa !119
  %.pre = load i8*, i8** %host.i, align 8, !tbaa !87
  %cmp.i97 = icmp eq i8* %.pre, null
  br i1 %cmp.i97, label %_halide_buffer_is_bounds_query.exit101, label %true_bb3

_halide_buffer_is_bounds_query.exit101:           ; preds = %_halide_buffer_is_bounds_query.exit, %after_bb
  %device.i98 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %f1.buffer, i64 0, i32 0
  %57 = load i64, i64* %device.i98, align 8, !tbaa !117
  %cmp1.i99 = icmp eq i64 %57, 0
  br i1 %cmp1.i99, label %call_destructor.exit.thread, label %true_bb3

true_bb3:                                         ; preds = %"assert succeeded2", %after_bb, %_halide_buffer_is_bounds_query.exit101
  %58 = icmp eq i16 %17, 1
  %59 = icmp eq i8 %16, 32
  %60 = icmp eq i8 %15, 0
  %61 = and i1 %60, %59
  %62 = and i1 %61, %58
  br i1 %62, label %"assert succeeded7", label %"assert failed6", !prof !210

call_destructor.exit.thread:                      ; preds = %_halide_buffer_is_bounds_query.exit101, %"sync pfor f1.s0.v1.v18"
  call void asm sideeffect "", ""() #9, !srcloc !179
  %63 = atomicrmw sub i32* %active_threads.i, i32 1 seq_cst
  call void asm sideeffect "", ""() #9, !srcloc !180
  store i8* null, i8** %0, align 8
  store i8* null, i8** %1, align 8
  br label %call_destructor.exit65

"assert failed6":                                 ; preds = %true_bb3
  %64 = call i32 @halide_error_bad_type(i8* null, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @str.167, i64 0, i64 0), i8 %15, i8 0, i8 %16, i8 32, i16 %17, i16 1) #9
  br label %destructor_block

"assert succeeded7":                              ; preds = %true_bb3
  %65 = sub nsw i32 %32, %20
  %66 = icmp slt i32 %65, %19
  %67 = icmp sle i32 %19, %b2
  %68 = and i1 %67, %66
  br i1 %68, label %"assert succeeded9", label %"assert failed8", !prof !210

"assert failed8":                                 ; preds = %"assert succeeded7"
  %69 = call i32 @halide_error_access_out_of_bounds(i8* null, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @str.167, i64 0, i64 0), i32 0, i32 %27, i32 %32, i32 %19, i32 %b3) #9
  br label %destructor_block

"assert succeeded9":                              ; preds = %"assert succeeded7"
  %70 = icmp sgt i32 %20, -1
  br i1 %70, label %"assert succeeded11", label %"assert failed10", !prof !210

"assert failed10":                                ; preds = %"assert succeeded9"
  %71 = call i32 @halide_error_buffer_extents_negative(i8* null, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @str.167, i64 0, i64 0), i32 0, i32 %20) #9
  br label %destructor_block

"assert succeeded11":                             ; preds = %"assert succeeded9"
  %72 = sub nsw i32 %40, %23
  %73 = icmp slt i32 %72, %22
  %74 = icmp sle i32 %22, %b5
  %75 = and i1 %74, %73
  br i1 %75, label %"assert succeeded13", label %"assert failed12", !prof !210

"assert failed12":                                ; preds = %"assert succeeded11"
  %76 = call i32 @halide_error_access_out_of_bounds(i8* null, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @str.167, i64 0, i64 0), i32 1, i32 %35, i32 %40, i32 %22, i32 %b6) #9
  br label %destructor_block

"assert succeeded13":                             ; preds = %"assert succeeded11"
  %77 = icmp sgt i32 %23, -1
  br i1 %77, label %"assert succeeded15", label %"assert failed14", !prof !210

"assert failed14":                                ; preds = %"assert succeeded13"
  %78 = call i32 @halide_error_buffer_extents_negative(i8* null, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @str.167, i64 0, i64 0), i32 1, i32 %23) #9
  br label %destructor_block

"assert succeeded15":                             ; preds = %"assert succeeded13"
  %79 = icmp eq i32 %21, 1
  br i1 %79, label %"assert succeeded19", label %"assert failed16", !prof !210

"assert failed16":                                ; preds = %"assert succeeded15"
  %80 = call i32 @halide_error_constraint_violated(i8* null, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.168, i64 0, i64 0), i32 %21, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.169, i64 0, i64 0), i32 1) #9
  br label %destructor_block

"assert succeeded19":                             ; preds = %"assert succeeded15"
  %81 = sext i32 %20 to i64
  %82 = sext i32 %23 to i64
  %f1.total_extent.1 = mul nsw i64 %82, %81
  %83 = sext i32 %24 to i64
  %x2 = mul nsw i64 %83, %82
  %84 = sub nsw i64 0, %x2
  %85 = icmp sgt i64 %x2, -1
  %86 = select i1 %85, i64 %x2, i64 %84
  %87 = icmp ult i64 %86, 2147483648
  br i1 %87, label %"assert succeeded21", label %"assert failed20", !prof !210

"assert failed20":                                ; preds = %"assert succeeded19"
  %88 = call i32 @halide_error_buffer_allocation_too_large(i8* null, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.163, i64 0, i64 0), i64 %86, i64 2147483647) #9
  br label %destructor_block

"assert succeeded21":                             ; preds = %"assert succeeded19"
  %89 = icmp slt i64 %f1.total_extent.1, 2147483648
  br i1 %89, label %"produce f1", label %"assert failed22", !prof !210

"assert failed22":                                ; preds = %"assert succeeded21"
  %90 = call i32 @halide_error_buffer_extents_too_large(i8* null, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.163, i64 0, i64 0), i64 %f1.total_extent.1, i64 2147483647) #9
  br label %destructor_block

"produce f1":                                     ; preds = %"assert succeeded21"
  %current_func.i105 = getelementptr inbounds %struct.halide_profiler_state, %struct.halide_profiler_state* %profiler_state, i64 0, i32 3
  call void asm sideeffect "", ""() #9, !srcloc !175
  %add.i106 = add nuw nsw i32 %profiler_token, 1
  store volatile i32 %add.i106, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  call void asm sideeffect "", ""() #9, !srcloc !179
  %91 = atomicrmw sub i32* %active_threads.i, i32 1 seq_cst
  call void asm sideeffect "", ""() #9, !srcloc !180
  %92 = add nuw nsw i32 %23, 7
  %93 = ashr i32 %92, 3
  %94 = icmp sgt i32 %93, 0
  br i1 %94, label %"pfor f1.s0.v1.v18.preheader", label %"end pfor f1.s0.v1.v18", !prof !210

"pfor f1.s0.v1.v18.preheader":                    ; preds = %"produce f1"
  %95 = add nuw nsw i32 %20, 7
  %96 = ashr i32 %95, 3
  %97 = icmp sgt i32 %96, 0
  %98 = add nsw i32 %iters, 1
  %99 = sext i32 %98 to i64
  %100 = shl nsw i64 %99, 8
  %101 = zext i32 %98 to i64
  %102 = shl nuw nsw i64 %101, 8
  %103 = icmp ult i32 %98, 8388608
  %104 = or i64 %102, 4
  %add.i112 = add nuw nsw i32 %profiler_token, 2
  %105 = icmp sgt i32 %iters, -1
  %106 = sitofp i32 %w to float
  %107 = sitofp i32 %h to float
  %108 = icmp sgt i32 %iters, 0
  %add.i116 = add nuw nsw i32 %profiler_token, 3
  %109 = sext i32 %22 to i64
  %110 = mul nsw i64 %83, %109
  %111 = sext i32 %19 to i64
  %112 = add nsw i64 %110, %111
  %113 = bitcast i8* %14 to i32*
  %114 = xor i32 %19, -1
  %115 = sub i32 7, %20
  %116 = sub i32 %115, %19
  %117 = xor i32 %22, -1
  %118 = sub i32 7, %23
  %119 = sub i32 %118, %22
  %120 = add nsw i64 %101, -2
  %121 = and i32 %98, 1
  %lcmp.mod = icmp eq i32 %121, 0
  %122 = icmp eq i64 %120, 0
  %123 = and i32 %98, 1
  %lcmp.mod300 = icmp eq i32 %123, 0
  %124 = icmp eq i64 %120, 0
  %125 = and i32 %98, 1
  %lcmp.mod302 = icmp eq i32 %125, 0
  %126 = icmp eq i64 %120, 0
  %127 = and i32 %98, 1
  %lcmp.mod304 = icmp eq i32 %127, 0
  %128 = icmp eq i64 %120, 0
  %129 = and i32 %98, 1
  %lcmp.mod306 = icmp eq i32 %129, 0
  %130 = icmp eq i64 %120, 0
  %131 = and i32 %98, 1
  %lcmp.mod308 = icmp eq i32 %131, 0
  %132 = icmp eq i64 %120, 0
  %133 = and i32 %98, 1
  %lcmp.mod310 = icmp eq i32 %133, 0
  %134 = icmp eq i64 %120, 0
  %135 = and i32 %98, 1
  %lcmp.mod312 = icmp eq i32 %135, 0
  %136 = icmp eq i64 %120, 0
  %137 = and i32 %98, 1
  %lcmp.mod314 = icmp eq i32 %137, 0
  %138 = icmp eq i64 %120, 0
  %139 = and i32 %98, 1
  %lcmp.mod316 = icmp eq i32 %139, 0
  %140 = icmp eq i64 %120, 0
  %141 = and i32 %98, 1
  %lcmp.mod318 = icmp eq i32 %141, 0
  %142 = icmp eq i64 %120, 0
  %143 = and i32 %98, 1
  %lcmp.mod320 = icmp eq i32 %143, 0
  %144 = icmp eq i64 %120, 0
  br label %"pfor f1.s0.v1.v18"

"pfor f1.s0.v1.v18":                              ; preds = %"pfor f1.s0.v1.v18.preheader", %"platch f1.s0.v1.v18"
  %indvars.iv214 = phi i32 [ %117, %"pfor f1.s0.v1.v18.preheader" ], [ %indvars.iv.next215, %"platch f1.s0.v1.v18" ]
  %f1.s0.v1.v18 = phi i32 [ 0, %"pfor f1.s0.v1.v18.preheader" ], [ %222, %"platch f1.s0.v1.v18" ]
  %145 = icmp sgt i32 %indvars.iv214, %119
  %smax216 = select i1 %145, i32 %indvars.iv214, i32 %119
  %146 = xor i32 %smax216, -1
  %147 = sext i32 %146 to i64
  detach label %"pbody f1.s0.v1.v18", label %"platch f1.s0.v1.v18"

"pbody f1.s0.v1.v18":                             ; preds = %"pfor f1.s0.v1.v18"
  call void asm sideeffect "", ""() #9, !srcloc !177
  %148 = atomicrmw add i32* %active_threads.i, i32 1 seq_cst
  call void asm sideeffect "", ""() #9, !srcloc !178
  %149 = shl nsw i32 %f1.s0.v1.v18, 3
  %a6 = add nsw i32 %149, %22
  %150 = icmp sgt i32 %b5, %a6
  %f1.s0.v1.v16.base = select i1 %150, i32 %a6, i32 %b5
  br i1 %97, label %"for f1.s0.v0.v17.preheader", label %"end for f1.s0.v0.v17", !prof !210

"for f1.s0.v0.v17.preheader":                     ; preds = %"pbody f1.s0.v1.v18"
  %151 = sext i32 %f1.s0.v1.v16.base to i64
  %152 = add nsw i32 %f1.s0.v1.v16.base, 7
  %.promoted = load i8*, i8** %1, align 8
  %.promoted178 = load i8*, i8** %0, align 8
  %153 = sitofp i32 %146 to float
  %154 = fdiv float %153, %107
  %155 = fmul float %154, %y_max
  %156 = fsub float 1.000000e+00, %154
  %157 = fmul float %156, %y_min
  %158 = fadd float %155, %157
  %indvars.iv.next204 = add nsw i64 %147, 1
  %159 = sub nsw i64 %indvars.iv.next204, %151
  %160 = shl nsw i64 %159, 3
  %161 = trunc i64 %indvars.iv.next204 to i32
  %162 = sitofp i32 %161 to float
  %163 = fdiv float %162, %107
  %164 = fmul float %163, %y_max
  %165 = fsub float 1.000000e+00, %163
  %166 = fmul float %165, %y_min
  %167 = fadd float %164, %166
  %indvars.iv.next204.1 = add nsw i64 %147, 2
  %168 = sub nsw i64 %indvars.iv.next204.1, %151
  %169 = shl nsw i64 %168, 3
  %170 = trunc i64 %indvars.iv.next204.1 to i32
  %171 = sitofp i32 %170 to float
  %172 = fdiv float %171, %107
  %173 = fmul float %172, %y_max
  %174 = fsub float 1.000000e+00, %172
  %175 = fmul float %174, %y_min
  %176 = fadd float %173, %175
  %indvars.iv.next204.2 = add nsw i64 %147, 3
  %177 = sub nsw i64 %indvars.iv.next204.2, %151
  %178 = shl nsw i64 %177, 3
  %179 = trunc i64 %indvars.iv.next204.2 to i32
  %180 = sitofp i32 %179 to float
  %181 = fdiv float %180, %107
  %182 = fmul float %181, %y_max
  %183 = fsub float 1.000000e+00, %181
  %184 = fmul float %183, %y_min
  %185 = fadd float %182, %184
  %indvars.iv.next204.3 = add nsw i64 %147, 4
  %186 = sub nsw i64 %indvars.iv.next204.3, %151
  %187 = shl nsw i64 %186, 3
  %188 = trunc i64 %indvars.iv.next204.3 to i32
  %189 = sitofp i32 %188 to float
  %190 = fdiv float %189, %107
  %191 = fmul float %190, %y_max
  %192 = fsub float 1.000000e+00, %190
  %193 = fmul float %192, %y_min
  %194 = fadd float %191, %193
  %indvars.iv.next204.4 = add nsw i64 %147, 5
  %195 = sub nsw i64 %indvars.iv.next204.4, %151
  %196 = shl nsw i64 %195, 3
  %197 = trunc i64 %indvars.iv.next204.4 to i32
  %198 = sitofp i32 %197 to float
  %199 = fdiv float %198, %107
  %200 = fmul float %199, %y_max
  %201 = fsub float 1.000000e+00, %199
  %202 = fmul float %201, %y_min
  %203 = fadd float %200, %202
  %indvars.iv.next204.5 = add nsw i64 %147, 6
  %204 = sub nsw i64 %indvars.iv.next204.5, %151
  %205 = shl nsw i64 %204, 3
  %206 = trunc i64 %indvars.iv.next204.5 to i32
  %207 = sitofp i32 %206 to float
  %208 = fdiv float %207, %107
  %209 = fmul float %208, %y_max
  %210 = fsub float 1.000000e+00, %208
  %211 = fmul float %210, %y_min
  %212 = fadd float %209, %211
  %indvars.iv.next204.6 = add nsw i64 %147, 7
  %213 = sub nsw i64 %indvars.iv.next204.6, %151
  %214 = shl nsw i64 %213, 3
  %215 = trunc i64 %indvars.iv.next204.6 to i32
  %216 = sitofp i32 %215 to float
  %217 = fdiv float %216, %107
  %218 = fmul float %217, %y_max
  %219 = fsub float 1.000000e+00, %217
  %220 = fmul float %219, %y_min
  %221 = fadd float %218, %220
  br label %"for f1.s0.v0.v17"

"platch f1.s0.v1.v18":                            ; preds = %"end for f1.s0.v0.v17", %"pfor f1.s0.v1.v18"
  %222 = add nuw nsw i32 %f1.s0.v1.v18, 1
  %223 = icmp eq i32 %222, %93
  %indvars.iv.next215 = add i32 %indvars.iv214, -8
  br i1 %223, label %"end pfor f1.s0.v1.v18", label %"pfor f1.s0.v1.v18"

"end pfor f1.s0.v1.v18":                          ; preds = %"platch f1.s0.v1.v18", %"produce f1"
  sync label %"sync pfor f1.s0.v1.v18"

"sync pfor f1.s0.v1.v18":                         ; preds = %"end pfor f1.s0.v1.v18"
  call void asm sideeffect "", ""() #9, !srcloc !177
  %224 = atomicrmw add i32* %active_threads.i, i32 1 seq_cst
  call void asm sideeffect "", ""() #9, !srcloc !178
  br label %call_destructor.exit.thread

"for f1.s0.v0.v17":                               ; preds = %"for f1.s0.v0.v17.preheader", %call_destructor.exit124
  %indvars.iv209 = phi i32 [ %114, %"for f1.s0.v0.v17.preheader" ], [ %indvars.iv.next210, %call_destructor.exit124 ]
  %225 = phi i8* [ %.promoted178, %"for f1.s0.v0.v17.preheader" ], [ null, %call_destructor.exit124 ]
  %226 = phi i8* [ %.promoted, %"for f1.s0.v0.v17.preheader" ], [ null, %call_destructor.exit124 ]
  %f1.s0.v0.v17 = phi i32 [ 0, %"for f1.s0.v0.v17.preheader" ], [ %591, %call_destructor.exit124 ]
  %227 = icmp sgt i32 %indvars.iv209, %116
  %smax211 = select i1 %227, i32 %indvars.iv209, i32 %116
  %228 = xor i32 %smax211, -1
  %229 = sext i32 %228 to i64
  %230 = shl nsw i32 %f1.s0.v0.v17, 3
  %a7 = add nsw i32 %230, %19
  %231 = icmp sgt i32 %b2, %a7
  %f1.s0.v0.v15.base = select i1 %231, i32 %a7, i32 %b2
  call void @halide_profiler_memory_allocate(i8* null, i8* %11, i32 2, i64 %100) #9
  br i1 %103, label %"assert succeeded25", label %"assert failed24", !prof !210

"end for f1.s0.v0.v17.loopexit":                  ; preds = %call_destructor.exit124
  store i8* null, i8** %1, align 8
  store i8* null, i8** %0, align 8
  br label %"end for f1.s0.v0.v17"

"end for f1.s0.v0.v17":                           ; preds = %"end for f1.s0.v0.v17.loopexit", %"pbody f1.s0.v1.v18"
  call void asm sideeffect "", ""() #9, !srcloc !179
  %232 = atomicrmw sub i32* %active_threads.i, i32 1 seq_cst
  call void asm sideeffect "", ""() #9, !srcloc !180
  br label %"platch f1.s0.v1.v18"

"assert failed24":                                ; preds = %"for f1.s0.v0.v17"
  store i8* %.promoted, i8** %1, align 8
  store i8* %.promoted178, i8** %0, align 8
  %233 = call i32 @halide_error_buffer_allocation_too_large(i8* null, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str.170, i64 0, i64 0), i64 %102, i64 2147483647) #9
  br label %destructor_block

"assert succeeded25":                             ; preds = %"for f1.s0.v0.v17"
  %234 = call i8* @halide_malloc(i8* null, i64 %104)
  %f0.0 = bitcast i8* %234 to float*
  %235 = icmp eq i8* %234, null
  br i1 %235, label %"assert failed26", label %"assert succeeded27", !prof !211

"assert failed26":                                ; preds = %"assert succeeded25"
  store i8* %226, i8** %1, align 8
  store i8* %225, i8** %0, align 8
  %236 = call i32 @halide_error_out_of_memory(i8* null) #9
  br label %destructor_block

"assert succeeded27":                             ; preds = %"assert succeeded25"
  call void @halide_profiler_memory_allocate(i8* null, i8* %11, i32 2, i64 %100) #9
  %237 = call i8* @halide_malloc(i8* null, i64 %104)
  %f0.1 = bitcast i8* %237 to float*
  %238 = icmp eq i8* %237, null
  br i1 %238, label %"assert failed30", label %"assert succeeded31", !prof !211

"assert failed30":                                ; preds = %"assert succeeded27"
  store i8* %234, i8** %1, align 8
  store i8* %225, i8** %0, align 8
  %239 = call i32 @halide_error_out_of_memory(i8* null) #9
  br label %destructor_block

"assert succeeded31":                             ; preds = %"assert succeeded27"
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i112, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  %240 = sext i32 %f1.s0.v0.v15.base to i64
  br i1 %105, label %"for f0.s0.v2.preheader", label %"for f0.s1.v1.preheader", !prof !210

"for f0.s0.v2.preheader":                         ; preds = %"assert succeeded31"
  %241 = sitofp i32 %228 to float
  %242 = fdiv float %241, %106
  %243 = fmul float %242, %x_max
  %244 = fsub float 1.000000e+00, %242
  %245 = fmul float %244, %x_min
  %246 = fadd float %243, %245
  %indvars.iv.next199 = add nsw i64 %229, 1
  %247 = trunc i64 %indvars.iv.next199 to i32
  %248 = sitofp i32 %247 to float
  %249 = fdiv float %248, %106
  %250 = fmul float %249, %x_max
  %251 = fsub float 1.000000e+00, %249
  %252 = fmul float %251, %x_min
  %253 = fadd float %250, %252
  %indvars.iv.next199.1 = add nsw i64 %229, 2
  %254 = trunc i64 %indvars.iv.next199.1 to i32
  %255 = sitofp i32 %254 to float
  %256 = fdiv float %255, %106
  %257 = fmul float %256, %x_max
  %258 = fsub float 1.000000e+00, %256
  %259 = fmul float %258, %x_min
  %260 = fadd float %257, %259
  %indvars.iv.next199.2 = add nsw i64 %229, 3
  %261 = trunc i64 %indvars.iv.next199.2 to i32
  %262 = sitofp i32 %261 to float
  %263 = fdiv float %262, %106
  %264 = fmul float %263, %x_max
  %265 = fsub float 1.000000e+00, %263
  %266 = fmul float %265, %x_min
  %267 = fadd float %264, %266
  %indvars.iv.next199.3 = add nsw i64 %229, 4
  %268 = trunc i64 %indvars.iv.next199.3 to i32
  %269 = sitofp i32 %268 to float
  %270 = fdiv float %269, %106
  %271 = fmul float %270, %x_max
  %272 = fsub float 1.000000e+00, %270
  %273 = fmul float %272, %x_min
  %274 = fadd float %271, %273
  %indvars.iv.next199.4 = add nsw i64 %229, 5
  %275 = trunc i64 %indvars.iv.next199.4 to i32
  %276 = sitofp i32 %275 to float
  %277 = fdiv float %276, %106
  %278 = fmul float %277, %x_max
  %279 = fsub float 1.000000e+00, %277
  %280 = fmul float %279, %x_min
  %281 = fadd float %278, %280
  %indvars.iv.next199.5 = add nsw i64 %229, 6
  %282 = trunc i64 %indvars.iv.next199.5 to i32
  %283 = sitofp i32 %282 to float
  %284 = fdiv float %283, %106
  %285 = fmul float %284, %x_max
  %286 = fsub float 1.000000e+00, %284
  %287 = fmul float %286, %x_min
  %288 = fadd float %285, %287
  %indvars.iv.next199.6 = add nsw i64 %229, 7
  %289 = trunc i64 %indvars.iv.next199.6 to i32
  %290 = sitofp i32 %289 to float
  %291 = fdiv float %290, %106
  %292 = fmul float %291, %x_max
  %293 = fsub float 1.000000e+00, %291
  %294 = fmul float %293, %x_min
  %295 = fadd float %292, %294
  br label %"for f0.s0.v2"

"for f0.s1.v1.preheader":                         ; preds = %"for f0.s0.v2", %"assert succeeded31"
  br i1 %108, label %"for f0.s1.v1.us.preheader", label %"consume f0", !prof !210

"for f0.s1.v1.us.preheader":                      ; preds = %"for f0.s1.v1.preheader"
  %indvars.iv.next213 = add nsw i64 %229, 1
  %indvars.iv.next213.1 = add nsw i64 %229, 2
  %indvars.iv.next213.2 = add nsw i64 %229, 3
  %indvars.iv.next213.3 = add nsw i64 %229, 4
  %indvars.iv.next213.4 = add nsw i64 %229, 5
  %indvars.iv.next213.5 = add nsw i64 %229, 6
  %indvars.iv.next213.6 = add nsw i64 %229, 7
  br label %"for f0.s1.v1.us"

"for f0.s1.v1.us":                                ; preds = %"for f0.s1.v1.us.preheader", %"end for f0.s1.r4$x.loopexit.us.us.7"
  %indvars.iv217 = phi i64 [ %147, %"for f0.s1.v1.us.preheader" ], [ %indvars.iv.next218, %"end for f0.s1.r4$x.loopexit.us.us.7" ]
  %296 = sub nsw i64 %indvars.iv217, %151
  %297 = shl i64 %296, 3
  %298 = trunc i64 %297 to i32
  %299 = sub nsw i64 %297, %240
  %300 = add nsw i64 %299, %229
  br i1 %lcmp.mod, label %"for f0.s1.r4$x.us.us.prol", label %"for f0.s1.r4$x.us.us.prol.loopexit"

"for f0.s1.r4$x.us.us.prol":                      ; preds = %"for f0.s1.v1.us"
  %t325.us.us.prol = shl i64 %296, 35
  %sext = add nuw nsw i64 %t325.us.us.prol, 274877906944
  %301 = ashr exact i64 %sext, 32
  %302 = add nsw i64 %301, -64
  %303 = getelementptr inbounds float, float* %f0.0, i64 %302
  %t326.us.us.prol = load float, float* %303, align 4, !tbaa !215
  %304 = getelementptr inbounds float, float* %f0.1, i64 %302
  %t327.us.us.prol = load float, float* %304, align 4, !tbaa !217
  %305 = fmul float %t327.us.us.prol, %t327.us.us.prol
  %306 = fmul float %t326.us.us.prol, %t326.us.us.prol
  %307 = fsub float %306, %305
  %308 = fadd float %307, %c_real
  %309 = add nuw nsw i64 %300, 64
  %310 = getelementptr inbounds float, float* %f0.0, i64 %309
  store float %308, float* %310, align 4, !tbaa !215
  %311 = load float, float* %303, align 4, !tbaa !215
  %312 = fmul float %t327.us.us.prol, %311
  %313 = fmul float %312, 2.000000e+00
  %314 = fadd float %313, %c_imag
  %315 = getelementptr inbounds float, float* %f0.1, i64 %309
  store float %314, float* %315, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.prol.loopexit"

"for f0.s1.r4$x.us.us.prol.loopexit":             ; preds = %"for f0.s1.r4$x.us.us.prol", %"for f0.s1.v1.us"
  %indvars.iv207.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.prol" ], [ 1, %"for f0.s1.v1.us" ]
  br i1 %122, label %"end for f0.s1.r4$x.loopexit.us.us", label %"for f0.s1.r4$x.us.us"

"end for f0.s1.r4$x.loopexit.us.us":              ; preds = %"for f0.s1.r4$x.us.us", %"for f0.s1.r4$x.us.us.prol.loopexit"
  %316 = or i32 %298, 1
  %317 = add nsw i64 %299, %indvars.iv.next213
  br i1 %lcmp.mod300, label %"for f0.s1.r4$x.us.us.1.prol", label %"for f0.s1.r4$x.us.us.1.prol.loopexit"

"for f0.s1.r4$x.us.us.1.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us"
  %t325.us.us.1.prol = add i32 %316, 64
  %318 = sext i32 %t325.us.us.1.prol to i64
  %319 = add nsw i64 %318, -64
  %320 = getelementptr inbounds float, float* %f0.0, i64 %319
  %t326.us.us.1.prol = load float, float* %320, align 4, !tbaa !215
  %321 = getelementptr inbounds float, float* %f0.1, i64 %319
  %t327.us.us.1.prol = load float, float* %321, align 4, !tbaa !217
  %322 = fmul float %t327.us.us.1.prol, %t327.us.us.1.prol
  %323 = fmul float %t326.us.us.1.prol, %t326.us.us.1.prol
  %324 = fsub float %323, %322
  %325 = fadd float %324, %c_real
  %326 = add nuw nsw i64 %317, 64
  %327 = getelementptr inbounds float, float* %f0.0, i64 %326
  store float %325, float* %327, align 4, !tbaa !215
  %328 = load float, float* %320, align 4, !tbaa !215
  %329 = fmul float %t327.us.us.1.prol, %328
  %330 = fmul float %329, 2.000000e+00
  %331 = fadd float %330, %c_imag
  %332 = getelementptr inbounds float, float* %f0.1, i64 %326
  store float %331, float* %332, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.1.prol.loopexit"

"for f0.s1.r4$x.us.us.1.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.1.prol", %"end for f0.s1.r4$x.loopexit.us.us"
  %indvars.iv207.1.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.1.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us" ]
  br i1 %124, label %"end for f0.s1.r4$x.loopexit.us.us.1", label %"for f0.s1.r4$x.us.us.1"

"for f0.s1.r4$x.us.us":                           ; preds = %"for f0.s1.r4$x.us.us.prol.loopexit", %"for f0.s1.r4$x.us.us"
  %indvars.iv207 = phi i64 [ %indvars.iv.next208.1297, %"for f0.s1.r4$x.us.us" ], [ %indvars.iv207.unr.ph, %"for f0.s1.r4$x.us.us.prol.loopexit" ]
  %333 = trunc i64 %indvars.iv207 to i32
  %334 = shl i32 %333, 6
  %t325.us.us = add i32 %334, %298
  %335 = sext i32 %t325.us.us to i64
  %336 = add nsw i64 %335, -64
  %337 = getelementptr inbounds float, float* %f0.0, i64 %336
  %t326.us.us = load float, float* %337, align 4, !tbaa !215
  %338 = getelementptr inbounds float, float* %f0.1, i64 %336
  %t327.us.us = load float, float* %338, align 4, !tbaa !217
  %339 = fmul float %t327.us.us, %t327.us.us
  %340 = fmul float %t326.us.us, %t326.us.us
  %341 = fsub float %340, %339
  %342 = fadd float %341, %c_real
  %343 = shl nsw i64 %indvars.iv207, 6
  %344 = add nuw nsw i64 %300, %343
  %345 = getelementptr inbounds float, float* %f0.0, i64 %344
  store float %342, float* %345, align 4, !tbaa !215
  %346 = load float, float* %337, align 4, !tbaa !215
  %347 = fmul float %t327.us.us, %346
  %348 = fmul float %347, 2.000000e+00
  %349 = fadd float %348, %c_imag
  %350 = getelementptr inbounds float, float* %f0.1, i64 %344
  store float %349, float* %350, align 4, !tbaa !217
  %indvars.iv.next208 = add nuw nsw i64 %indvars.iv207, 1
  %351 = trunc i64 %indvars.iv.next208 to i32
  %352 = shl i32 %351, 6
  %t325.us.us.1294 = add i32 %352, %298
  %353 = sext i32 %t325.us.us.1294 to i64
  %354 = add nsw i64 %353, -64
  %355 = getelementptr inbounds float, float* %f0.0, i64 %354
  %t326.us.us.1295 = load float, float* %355, align 4, !tbaa !215
  %356 = getelementptr inbounds float, float* %f0.1, i64 %354
  %t327.us.us.1296 = load float, float* %356, align 4, !tbaa !217
  %357 = fmul float %t327.us.us.1296, %t327.us.us.1296
  %358 = fmul float %t326.us.us.1295, %t326.us.us.1295
  %359 = fsub float %358, %357
  %360 = fadd float %359, %c_real
  %361 = shl nsw i64 %indvars.iv.next208, 6
  %362 = add nuw nsw i64 %300, %361
  %363 = getelementptr inbounds float, float* %f0.0, i64 %362
  store float %360, float* %363, align 4, !tbaa !215
  %364 = load float, float* %355, align 4, !tbaa !215
  %365 = fmul float %t327.us.us.1296, %364
  %366 = fmul float %365, 2.000000e+00
  %367 = fadd float %366, %c_imag
  %368 = getelementptr inbounds float, float* %f0.1, i64 %362
  store float %367, float* %368, align 4, !tbaa !217
  %indvars.iv.next208.1297 = add nsw i64 %indvars.iv207, 2
  %369 = icmp eq i64 %indvars.iv.next208.1297, %101
  br i1 %369, label %"end for f0.s1.r4$x.loopexit.us.us", label %"for f0.s1.r4$x.us.us"

"for f0.s0.v2":                                   ; preds = %"for f0.s0.v2.preheader", %"for f0.s0.v2"
  %indvars.iv205 = phi i64 [ 0, %"for f0.s0.v2.preheader" ], [ %indvars.iv.next206, %"for f0.s0.v2" ]
  %370 = shl nsw i64 %indvars.iv205, 6
  %371 = sub nsw i64 %370, %240
  %372 = add nsw i64 %371, %229
  %373 = getelementptr inbounds float, float* %f0.0, i64 %372
  store float %246, float* %373, align 4, !tbaa !215
  %374 = getelementptr inbounds float, float* %f0.1, i64 %372
  store float %158, float* %374, align 4, !tbaa !217
  %375 = add nsw i64 %371, %indvars.iv.next199
  %376 = getelementptr inbounds float, float* %f0.0, i64 %375
  store float %253, float* %376, align 4, !tbaa !215
  %377 = getelementptr inbounds float, float* %f0.1, i64 %375
  store float %158, float* %377, align 4, !tbaa !217
  %378 = add nsw i64 %371, %indvars.iv.next199.1
  %379 = getelementptr inbounds float, float* %f0.0, i64 %378
  store float %260, float* %379, align 4, !tbaa !215
  %380 = getelementptr inbounds float, float* %f0.1, i64 %378
  store float %158, float* %380, align 4, !tbaa !217
  %381 = add nsw i64 %371, %indvars.iv.next199.2
  %382 = getelementptr inbounds float, float* %f0.0, i64 %381
  store float %267, float* %382, align 4, !tbaa !215
  %383 = getelementptr inbounds float, float* %f0.1, i64 %381
  store float %158, float* %383, align 4, !tbaa !217
  %384 = add nsw i64 %371, %indvars.iv.next199.3
  %385 = getelementptr inbounds float, float* %f0.0, i64 %384
  store float %274, float* %385, align 4, !tbaa !215
  %386 = getelementptr inbounds float, float* %f0.1, i64 %384
  store float %158, float* %386, align 4, !tbaa !217
  %387 = add nsw i64 %371, %indvars.iv.next199.4
  %388 = getelementptr inbounds float, float* %f0.0, i64 %387
  store float %281, float* %388, align 4, !tbaa !215
  %389 = getelementptr inbounds float, float* %f0.1, i64 %387
  store float %158, float* %389, align 4, !tbaa !217
  %390 = add nsw i64 %371, %indvars.iv.next199.5
  %391 = getelementptr inbounds float, float* %f0.0, i64 %390
  store float %288, float* %391, align 4, !tbaa !215
  %392 = getelementptr inbounds float, float* %f0.1, i64 %390
  store float %158, float* %392, align 4, !tbaa !217
  %393 = add nsw i64 %371, %indvars.iv.next199.6
  %394 = getelementptr inbounds float, float* %f0.0, i64 %393
  store float %295, float* %394, align 4, !tbaa !215
  %395 = getelementptr inbounds float, float* %f0.1, i64 %393
  store float %158, float* %395, align 4, !tbaa !217
  %396 = add nsw i64 %371, %160
  %397 = add nsw i64 %396, %229
  %398 = getelementptr inbounds float, float* %f0.0, i64 %397
  store float %246, float* %398, align 4, !tbaa !215
  %399 = getelementptr inbounds float, float* %f0.1, i64 %397
  store float %167, float* %399, align 4, !tbaa !217
  %400 = add nsw i64 %396, %indvars.iv.next199
  %401 = getelementptr inbounds float, float* %f0.0, i64 %400
  store float %253, float* %401, align 4, !tbaa !215
  %402 = getelementptr inbounds float, float* %f0.1, i64 %400
  store float %167, float* %402, align 4, !tbaa !217
  %403 = add nsw i64 %396, %indvars.iv.next199.1
  %404 = getelementptr inbounds float, float* %f0.0, i64 %403
  store float %260, float* %404, align 4, !tbaa !215
  %405 = getelementptr inbounds float, float* %f0.1, i64 %403
  store float %167, float* %405, align 4, !tbaa !217
  %406 = add nsw i64 %396, %indvars.iv.next199.2
  %407 = getelementptr inbounds float, float* %f0.0, i64 %406
  store float %267, float* %407, align 4, !tbaa !215
  %408 = getelementptr inbounds float, float* %f0.1, i64 %406
  store float %167, float* %408, align 4, !tbaa !217
  %409 = add nsw i64 %396, %indvars.iv.next199.3
  %410 = getelementptr inbounds float, float* %f0.0, i64 %409
  store float %274, float* %410, align 4, !tbaa !215
  %411 = getelementptr inbounds float, float* %f0.1, i64 %409
  store float %167, float* %411, align 4, !tbaa !217
  %412 = add nsw i64 %396, %indvars.iv.next199.4
  %413 = getelementptr inbounds float, float* %f0.0, i64 %412
  store float %281, float* %413, align 4, !tbaa !215
  %414 = getelementptr inbounds float, float* %f0.1, i64 %412
  store float %167, float* %414, align 4, !tbaa !217
  %415 = add nsw i64 %396, %indvars.iv.next199.5
  %416 = getelementptr inbounds float, float* %f0.0, i64 %415
  store float %288, float* %416, align 4, !tbaa !215
  %417 = getelementptr inbounds float, float* %f0.1, i64 %415
  store float %167, float* %417, align 4, !tbaa !217
  %418 = add nsw i64 %396, %indvars.iv.next199.6
  %419 = getelementptr inbounds float, float* %f0.0, i64 %418
  store float %295, float* %419, align 4, !tbaa !215
  %420 = getelementptr inbounds float, float* %f0.1, i64 %418
  store float %167, float* %420, align 4, !tbaa !217
  %421 = add nsw i64 %371, %169
  %422 = add nsw i64 %421, %229
  %423 = getelementptr inbounds float, float* %f0.0, i64 %422
  store float %246, float* %423, align 4, !tbaa !215
  %424 = getelementptr inbounds float, float* %f0.1, i64 %422
  store float %176, float* %424, align 4, !tbaa !217
  %425 = add nsw i64 %421, %indvars.iv.next199
  %426 = getelementptr inbounds float, float* %f0.0, i64 %425
  store float %253, float* %426, align 4, !tbaa !215
  %427 = getelementptr inbounds float, float* %f0.1, i64 %425
  store float %176, float* %427, align 4, !tbaa !217
  %428 = add nsw i64 %421, %indvars.iv.next199.1
  %429 = getelementptr inbounds float, float* %f0.0, i64 %428
  store float %260, float* %429, align 4, !tbaa !215
  %430 = getelementptr inbounds float, float* %f0.1, i64 %428
  store float %176, float* %430, align 4, !tbaa !217
  %431 = add nsw i64 %421, %indvars.iv.next199.2
  %432 = getelementptr inbounds float, float* %f0.0, i64 %431
  store float %267, float* %432, align 4, !tbaa !215
  %433 = getelementptr inbounds float, float* %f0.1, i64 %431
  store float %176, float* %433, align 4, !tbaa !217
  %434 = add nsw i64 %421, %indvars.iv.next199.3
  %435 = getelementptr inbounds float, float* %f0.0, i64 %434
  store float %274, float* %435, align 4, !tbaa !215
  %436 = getelementptr inbounds float, float* %f0.1, i64 %434
  store float %176, float* %436, align 4, !tbaa !217
  %437 = add nsw i64 %421, %indvars.iv.next199.4
  %438 = getelementptr inbounds float, float* %f0.0, i64 %437
  store float %281, float* %438, align 4, !tbaa !215
  %439 = getelementptr inbounds float, float* %f0.1, i64 %437
  store float %176, float* %439, align 4, !tbaa !217
  %440 = add nsw i64 %421, %indvars.iv.next199.5
  %441 = getelementptr inbounds float, float* %f0.0, i64 %440
  store float %288, float* %441, align 4, !tbaa !215
  %442 = getelementptr inbounds float, float* %f0.1, i64 %440
  store float %176, float* %442, align 4, !tbaa !217
  %443 = add nsw i64 %421, %indvars.iv.next199.6
  %444 = getelementptr inbounds float, float* %f0.0, i64 %443
  store float %295, float* %444, align 4, !tbaa !215
  %445 = getelementptr inbounds float, float* %f0.1, i64 %443
  store float %176, float* %445, align 4, !tbaa !217
  %446 = add nsw i64 %371, %178
  %447 = add nsw i64 %446, %229
  %448 = getelementptr inbounds float, float* %f0.0, i64 %447
  store float %246, float* %448, align 4, !tbaa !215
  %449 = getelementptr inbounds float, float* %f0.1, i64 %447
  store float %185, float* %449, align 4, !tbaa !217
  %450 = add nsw i64 %446, %indvars.iv.next199
  %451 = getelementptr inbounds float, float* %f0.0, i64 %450
  store float %253, float* %451, align 4, !tbaa !215
  %452 = getelementptr inbounds float, float* %f0.1, i64 %450
  store float %185, float* %452, align 4, !tbaa !217
  %453 = add nsw i64 %446, %indvars.iv.next199.1
  %454 = getelementptr inbounds float, float* %f0.0, i64 %453
  store float %260, float* %454, align 4, !tbaa !215
  %455 = getelementptr inbounds float, float* %f0.1, i64 %453
  store float %185, float* %455, align 4, !tbaa !217
  %456 = add nsw i64 %446, %indvars.iv.next199.2
  %457 = getelementptr inbounds float, float* %f0.0, i64 %456
  store float %267, float* %457, align 4, !tbaa !215
  %458 = getelementptr inbounds float, float* %f0.1, i64 %456
  store float %185, float* %458, align 4, !tbaa !217
  %459 = add nsw i64 %446, %indvars.iv.next199.3
  %460 = getelementptr inbounds float, float* %f0.0, i64 %459
  store float %274, float* %460, align 4, !tbaa !215
  %461 = getelementptr inbounds float, float* %f0.1, i64 %459
  store float %185, float* %461, align 4, !tbaa !217
  %462 = add nsw i64 %446, %indvars.iv.next199.4
  %463 = getelementptr inbounds float, float* %f0.0, i64 %462
  store float %281, float* %463, align 4, !tbaa !215
  %464 = getelementptr inbounds float, float* %f0.1, i64 %462
  store float %185, float* %464, align 4, !tbaa !217
  %465 = add nsw i64 %446, %indvars.iv.next199.5
  %466 = getelementptr inbounds float, float* %f0.0, i64 %465
  store float %288, float* %466, align 4, !tbaa !215
  %467 = getelementptr inbounds float, float* %f0.1, i64 %465
  store float %185, float* %467, align 4, !tbaa !217
  %468 = add nsw i64 %446, %indvars.iv.next199.6
  %469 = getelementptr inbounds float, float* %f0.0, i64 %468
  store float %295, float* %469, align 4, !tbaa !215
  %470 = getelementptr inbounds float, float* %f0.1, i64 %468
  store float %185, float* %470, align 4, !tbaa !217
  %471 = add nsw i64 %371, %187
  %472 = add nsw i64 %471, %229
  %473 = getelementptr inbounds float, float* %f0.0, i64 %472
  store float %246, float* %473, align 4, !tbaa !215
  %474 = getelementptr inbounds float, float* %f0.1, i64 %472
  store float %194, float* %474, align 4, !tbaa !217
  %475 = add nsw i64 %471, %indvars.iv.next199
  %476 = getelementptr inbounds float, float* %f0.0, i64 %475
  store float %253, float* %476, align 4, !tbaa !215
  %477 = getelementptr inbounds float, float* %f0.1, i64 %475
  store float %194, float* %477, align 4, !tbaa !217
  %478 = add nsw i64 %471, %indvars.iv.next199.1
  %479 = getelementptr inbounds float, float* %f0.0, i64 %478
  store float %260, float* %479, align 4, !tbaa !215
  %480 = getelementptr inbounds float, float* %f0.1, i64 %478
  store float %194, float* %480, align 4, !tbaa !217
  %481 = add nsw i64 %471, %indvars.iv.next199.2
  %482 = getelementptr inbounds float, float* %f0.0, i64 %481
  store float %267, float* %482, align 4, !tbaa !215
  %483 = getelementptr inbounds float, float* %f0.1, i64 %481
  store float %194, float* %483, align 4, !tbaa !217
  %484 = add nsw i64 %471, %indvars.iv.next199.3
  %485 = getelementptr inbounds float, float* %f0.0, i64 %484
  store float %274, float* %485, align 4, !tbaa !215
  %486 = getelementptr inbounds float, float* %f0.1, i64 %484
  store float %194, float* %486, align 4, !tbaa !217
  %487 = add nsw i64 %471, %indvars.iv.next199.4
  %488 = getelementptr inbounds float, float* %f0.0, i64 %487
  store float %281, float* %488, align 4, !tbaa !215
  %489 = getelementptr inbounds float, float* %f0.1, i64 %487
  store float %194, float* %489, align 4, !tbaa !217
  %490 = add nsw i64 %471, %indvars.iv.next199.5
  %491 = getelementptr inbounds float, float* %f0.0, i64 %490
  store float %288, float* %491, align 4, !tbaa !215
  %492 = getelementptr inbounds float, float* %f0.1, i64 %490
  store float %194, float* %492, align 4, !tbaa !217
  %493 = add nsw i64 %471, %indvars.iv.next199.6
  %494 = getelementptr inbounds float, float* %f0.0, i64 %493
  store float %295, float* %494, align 4, !tbaa !215
  %495 = getelementptr inbounds float, float* %f0.1, i64 %493
  store float %194, float* %495, align 4, !tbaa !217
  %496 = add nsw i64 %371, %196
  %497 = add nsw i64 %496, %229
  %498 = getelementptr inbounds float, float* %f0.0, i64 %497
  store float %246, float* %498, align 4, !tbaa !215
  %499 = getelementptr inbounds float, float* %f0.1, i64 %497
  store float %203, float* %499, align 4, !tbaa !217
  %500 = add nsw i64 %496, %indvars.iv.next199
  %501 = getelementptr inbounds float, float* %f0.0, i64 %500
  store float %253, float* %501, align 4, !tbaa !215
  %502 = getelementptr inbounds float, float* %f0.1, i64 %500
  store float %203, float* %502, align 4, !tbaa !217
  %503 = add nsw i64 %496, %indvars.iv.next199.1
  %504 = getelementptr inbounds float, float* %f0.0, i64 %503
  store float %260, float* %504, align 4, !tbaa !215
  %505 = getelementptr inbounds float, float* %f0.1, i64 %503
  store float %203, float* %505, align 4, !tbaa !217
  %506 = add nsw i64 %496, %indvars.iv.next199.2
  %507 = getelementptr inbounds float, float* %f0.0, i64 %506
  store float %267, float* %507, align 4, !tbaa !215
  %508 = getelementptr inbounds float, float* %f0.1, i64 %506
  store float %203, float* %508, align 4, !tbaa !217
  %509 = add nsw i64 %496, %indvars.iv.next199.3
  %510 = getelementptr inbounds float, float* %f0.0, i64 %509
  store float %274, float* %510, align 4, !tbaa !215
  %511 = getelementptr inbounds float, float* %f0.1, i64 %509
  store float %203, float* %511, align 4, !tbaa !217
  %512 = add nsw i64 %496, %indvars.iv.next199.4
  %513 = getelementptr inbounds float, float* %f0.0, i64 %512
  store float %281, float* %513, align 4, !tbaa !215
  %514 = getelementptr inbounds float, float* %f0.1, i64 %512
  store float %203, float* %514, align 4, !tbaa !217
  %515 = add nsw i64 %496, %indvars.iv.next199.5
  %516 = getelementptr inbounds float, float* %f0.0, i64 %515
  store float %288, float* %516, align 4, !tbaa !215
  %517 = getelementptr inbounds float, float* %f0.1, i64 %515
  store float %203, float* %517, align 4, !tbaa !217
  %518 = add nsw i64 %496, %indvars.iv.next199.6
  %519 = getelementptr inbounds float, float* %f0.0, i64 %518
  store float %295, float* %519, align 4, !tbaa !215
  %520 = getelementptr inbounds float, float* %f0.1, i64 %518
  store float %203, float* %520, align 4, !tbaa !217
  %521 = add nsw i64 %371, %205
  %522 = add nsw i64 %521, %229
  %523 = getelementptr inbounds float, float* %f0.0, i64 %522
  store float %246, float* %523, align 4, !tbaa !215
  %524 = getelementptr inbounds float, float* %f0.1, i64 %522
  store float %212, float* %524, align 4, !tbaa !217
  %525 = add nsw i64 %521, %indvars.iv.next199
  %526 = getelementptr inbounds float, float* %f0.0, i64 %525
  store float %253, float* %526, align 4, !tbaa !215
  %527 = getelementptr inbounds float, float* %f0.1, i64 %525
  store float %212, float* %527, align 4, !tbaa !217
  %528 = add nsw i64 %521, %indvars.iv.next199.1
  %529 = getelementptr inbounds float, float* %f0.0, i64 %528
  store float %260, float* %529, align 4, !tbaa !215
  %530 = getelementptr inbounds float, float* %f0.1, i64 %528
  store float %212, float* %530, align 4, !tbaa !217
  %531 = add nsw i64 %521, %indvars.iv.next199.2
  %532 = getelementptr inbounds float, float* %f0.0, i64 %531
  store float %267, float* %532, align 4, !tbaa !215
  %533 = getelementptr inbounds float, float* %f0.1, i64 %531
  store float %212, float* %533, align 4, !tbaa !217
  %534 = add nsw i64 %521, %indvars.iv.next199.3
  %535 = getelementptr inbounds float, float* %f0.0, i64 %534
  store float %274, float* %535, align 4, !tbaa !215
  %536 = getelementptr inbounds float, float* %f0.1, i64 %534
  store float %212, float* %536, align 4, !tbaa !217
  %537 = add nsw i64 %521, %indvars.iv.next199.4
  %538 = getelementptr inbounds float, float* %f0.0, i64 %537
  store float %281, float* %538, align 4, !tbaa !215
  %539 = getelementptr inbounds float, float* %f0.1, i64 %537
  store float %212, float* %539, align 4, !tbaa !217
  %540 = add nsw i64 %521, %indvars.iv.next199.5
  %541 = getelementptr inbounds float, float* %f0.0, i64 %540
  store float %288, float* %541, align 4, !tbaa !215
  %542 = getelementptr inbounds float, float* %f0.1, i64 %540
  store float %212, float* %542, align 4, !tbaa !217
  %543 = add nsw i64 %521, %indvars.iv.next199.6
  %544 = getelementptr inbounds float, float* %f0.0, i64 %543
  store float %295, float* %544, align 4, !tbaa !215
  %545 = getelementptr inbounds float, float* %f0.1, i64 %543
  store float %212, float* %545, align 4, !tbaa !217
  %546 = add nsw i64 %371, %214
  %547 = add nsw i64 %546, %229
  %548 = getelementptr inbounds float, float* %f0.0, i64 %547
  store float %246, float* %548, align 4, !tbaa !215
  %549 = getelementptr inbounds float, float* %f0.1, i64 %547
  store float %221, float* %549, align 4, !tbaa !217
  %550 = add nsw i64 %546, %indvars.iv.next199
  %551 = getelementptr inbounds float, float* %f0.0, i64 %550
  store float %253, float* %551, align 4, !tbaa !215
  %552 = getelementptr inbounds float, float* %f0.1, i64 %550
  store float %221, float* %552, align 4, !tbaa !217
  %553 = add nsw i64 %546, %indvars.iv.next199.1
  %554 = getelementptr inbounds float, float* %f0.0, i64 %553
  store float %260, float* %554, align 4, !tbaa !215
  %555 = getelementptr inbounds float, float* %f0.1, i64 %553
  store float %221, float* %555, align 4, !tbaa !217
  %556 = add nsw i64 %546, %indvars.iv.next199.2
  %557 = getelementptr inbounds float, float* %f0.0, i64 %556
  store float %267, float* %557, align 4, !tbaa !215
  %558 = getelementptr inbounds float, float* %f0.1, i64 %556
  store float %221, float* %558, align 4, !tbaa !217
  %559 = add nsw i64 %546, %indvars.iv.next199.3
  %560 = getelementptr inbounds float, float* %f0.0, i64 %559
  store float %274, float* %560, align 4, !tbaa !215
  %561 = getelementptr inbounds float, float* %f0.1, i64 %559
  store float %221, float* %561, align 4, !tbaa !217
  %562 = add nsw i64 %546, %indvars.iv.next199.4
  %563 = getelementptr inbounds float, float* %f0.0, i64 %562
  store float %281, float* %563, align 4, !tbaa !215
  %564 = getelementptr inbounds float, float* %f0.1, i64 %562
  store float %221, float* %564, align 4, !tbaa !217
  %565 = add nsw i64 %546, %indvars.iv.next199.5
  %566 = getelementptr inbounds float, float* %f0.0, i64 %565
  store float %288, float* %566, align 4, !tbaa !215
  %567 = getelementptr inbounds float, float* %f0.1, i64 %565
  store float %221, float* %567, align 4, !tbaa !217
  %568 = add nsw i64 %546, %indvars.iv.next199.6
  %569 = getelementptr inbounds float, float* %f0.0, i64 %568
  store float %295, float* %569, align 4, !tbaa !215
  %570 = getelementptr inbounds float, float* %f0.1, i64 %568
  store float %221, float* %570, align 4, !tbaa !217
  %indvars.iv.next206 = add nuw nsw i64 %indvars.iv205, 1
  %571 = icmp eq i64 %indvars.iv.next206, %101
  br i1 %571, label %"for f0.s1.v1.preheader", label %"for f0.s0.v2"

"consume f0":                                     ; preds = %"end for f0.s1.r4$x.loopexit.us.us.7", %"for f0.s1.v1.preheader"
  %.lcssa.lcssa = phi i1 [ false, %"for f0.s1.v1.preheader" ], [ true, %"end for f0.s1.r4$x.loopexit.us.us.7" ]
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i106, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  %572 = sub i64 %240, %112
  br label %"for f1.s0.v1.v16.v16"

"for f1.s0.v1.v16.v16":                           ; preds = %"consume argmin52", %"consume f0"
  %indvars.iv227 = phi i64 [ %indvars.iv.next228, %"consume argmin52" ], [ 0, %"consume f0" ]
  %573 = shl i64 %indvars.iv227, 1
  %574 = add nsw i64 %573, %151
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i116, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  br i1 %.lcssa.lcssa, label %"for argmin.s1.r4$x.preheader", label %"consume argmin", !prof !210

"for argmin.s1.r4$x.preheader":                   ; preds = %"for f1.s0.v1.v16.v16"
  %indvars.iv227.tr235 = trunc i64 %indvars.iv227 to i32
  %575 = shl i32 %indvars.iv227.tr235, 4
  br i1 %lcmp.mod314, label %"for argmin.s1.r4$x.prol", label %"for argmin.s1.r4$x.prol.loopexit"

"for argmin.s1.r4$x.prol":                        ; preds = %"for argmin.s1.r4$x.preheader"
  %t329.prol = add nsw i32 %575, 64
  %576 = sext i32 %t329.prol to i64
  %577 = getelementptr inbounds float, float* %f0.0, i64 %576
  %578 = bitcast float* %577 to <4 x float>*
  %t330.prol = load <4 x float>, <4 x float>* %578, align 32, !tbaa !215
  %579 = getelementptr inbounds float, float* %f0.1, i64 %576
  %580 = bitcast float* %579 to <4 x float>*
  %t331.prol = load <4 x float>, <4 x float>* %580, align 32, !tbaa !217
  %581 = fsub <4 x float> zeroinitializer, %t331.prol
  %582 = fmul <4 x float> %t331.prol, %581
  %583 = fadd <4 x float> %582, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %584 = fmul <4 x float> %t330.prol, %t330.prol
  %585 = fcmp ogt <4 x float> %583, %584
  %586 = shufflevector <4 x i1> %585, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp59.prol = xor <128 x i1> %586, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %587 = and <128 x i1> %tmp59.prol, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %588 = shufflevector <128 x i1> %587, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %argmin.0.value.x4.prol = zext <4 x i1> %588 to <4 x i32>
  %589 = select <128 x i1> %587, <128 x i1> %586, <128 x i1> <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %argmin.1.value.x4.prol = shufflevector <128 x i1> %589, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %590 = zext <4 x i1> %argmin.1.value.x4.prol to <4 x i8>
  br label %"for argmin.s1.r4$x.prol.loopexit"

"for argmin.s1.r4$x.prol.loopexit":               ; preds = %"for argmin.s1.r4$x.prol", %"for argmin.s1.r4$x.preheader"
  %argmin.0.value.x4.lcssa.unr.ph = phi <4 x i32> [ %argmin.0.value.x4.prol, %"for argmin.s1.r4$x.prol" ], [ undef, %"for argmin.s1.r4$x.preheader" ]
  %.lcssa.unr.ph = phi <4 x i8> [ %590, %"for argmin.s1.r4$x.prol" ], [ undef, %"for argmin.s1.r4$x.preheader" ]
  %indvars.iv219.unr.ph = phi i64 [ 2, %"for argmin.s1.r4$x.prol" ], [ 1, %"for argmin.s1.r4$x.preheader" ]
  %argmin.153.0.unr.ph = phi <4 x i8> [ %590, %"for argmin.s1.r4$x.prol" ], [ <i8 1, i8 1, i8 1, i8 1>, %"for argmin.s1.r4$x.preheader" ]
  %argmin.054.0.unr.ph = phi <4 x i32> [ %argmin.0.value.x4.prol, %"for argmin.s1.r4$x.prol" ], [ zeroinitializer, %"for argmin.s1.r4$x.preheader" ]
  br i1 %138, label %"consume argmin", label %"for argmin.s1.r4$x"

call_destructor.exit124:                          ; preds = %"consume argmin52"
  call void @halide_profiler_memory_free(i8* null, i8* %11, i32 2, i64 %100) #9
  call void @halide_free(i8* null, i8* nonnull %234) #12
  call void @halide_profiler_memory_free(i8* null, i8* %11, i32 2, i64 %100) #9
  call void @halide_free(i8* null, i8* nonnull %237) #12
  %591 = add nuw nsw i32 %f1.s0.v0.v17, 1
  %592 = icmp eq i32 %591, %96
  %indvars.iv.next210 = add i32 %indvars.iv209, -8
  br i1 %592, label %"end for f1.s0.v0.v17.loopexit", label %"for f1.s0.v0.v17"

"for argmin.s1.r4$x":                             ; preds = %"for argmin.s1.r4$x.prol.loopexit", %"for argmin.s1.r4$x"
  %indvars.iv219 = phi i64 [ %indvars.iv.next220.1, %"for argmin.s1.r4$x" ], [ %indvars.iv219.unr.ph, %"for argmin.s1.r4$x.prol.loopexit" ]
  %argmin.153.0 = phi <4 x i8> [ %631, %"for argmin.s1.r4$x" ], [ %argmin.153.0.unr.ph, %"for argmin.s1.r4$x.prol.loopexit" ]
  %argmin.054.0 = phi <4 x i32> [ %argmin.0.value.x4.1, %"for argmin.s1.r4$x" ], [ %argmin.054.0.unr.ph, %"for argmin.s1.r4$x.prol.loopexit" ]
  %593 = trunc i64 %indvars.iv219 to i32
  %594 = shl i32 %593, 6
  %t329 = add nsw i32 %594, %575
  %595 = sext i32 %t329 to i64
  %596 = getelementptr inbounds float, float* %f0.0, i64 %595
  %597 = bitcast float* %596 to <4 x float>*
  %t330 = load <4 x float>, <4 x float>* %597, align 32, !tbaa !215
  %598 = getelementptr inbounds float, float* %f0.1, i64 %595
  %599 = bitcast float* %598 to <4 x float>*
  %t331 = load <4 x float>, <4 x float>* %599, align 32, !tbaa !217
  %600 = icmp ne <4 x i8> %argmin.153.0, zeroinitializer
  %601 = fsub <4 x float> zeroinitializer, %t331
  %602 = fmul <4 x float> %t331, %601
  %603 = fadd <4 x float> %602, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %604 = fmul <4 x float> %t330, %t330
  %605 = fcmp ogt <4 x float> %603, %604
  %606 = shufflevector <4 x i1> %600, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %607 = shufflevector <4 x i1> %605, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp59 = xor <128 x i1> %607, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %608 = and <128 x i1> %tmp59, %606
  %609 = shufflevector <128 x i1> %608, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %610 = insertelement <4 x i32> undef, i32 %593, i32 0
  %611 = shufflevector <4 x i32> %610, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x4 = select <4 x i1> %609, <4 x i32> %611, <4 x i32> %argmin.054.0
  %612 = select <128 x i1> %608, <128 x i1> %607, <128 x i1> %606
  %indvars.iv.next220 = add nuw nsw i64 %indvars.iv219, 1
  %613 = trunc i64 %indvars.iv.next220 to i32
  %614 = shl i32 %613, 6
  %t329.1 = add nsw i32 %614, %575
  %615 = sext i32 %t329.1 to i64
  %616 = getelementptr inbounds float, float* %f0.0, i64 %615
  %617 = bitcast float* %616 to <4 x float>*
  %t330.1 = load <4 x float>, <4 x float>* %617, align 32, !tbaa !215
  %618 = getelementptr inbounds float, float* %f0.1, i64 %615
  %619 = bitcast float* %618 to <4 x float>*
  %t331.1 = load <4 x float>, <4 x float>* %619, align 32, !tbaa !217
  %620 = fsub <4 x float> zeroinitializer, %t331.1
  %621 = fmul <4 x float> %t331.1, %620
  %622 = fadd <4 x float> %621, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %623 = fmul <4 x float> %t330.1, %t330.1
  %624 = fcmp ogt <4 x float> %622, %623
  %625 = shufflevector <4 x i1> %624, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp59.1 = xor <128 x i1> %625, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %626 = and <128 x i1> %tmp59.1, %612
  %627 = shufflevector <128 x i1> %626, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %628 = insertelement <4 x i32> undef, i32 %613, i32 0
  %629 = shufflevector <4 x i32> %628, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x4.1 = select <4 x i1> %627, <4 x i32> %629, <4 x i32> %argmin.0.value.x4
  %630 = select <128 x i1> %626, <128 x i1> %625, <128 x i1> %612
  %argmin.1.value.x4.1 = shufflevector <128 x i1> %630, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %631 = zext <4 x i1> %argmin.1.value.x4.1 to <4 x i8>
  %indvars.iv.next220.1 = add nsw i64 %indvars.iv219, 2
  %632 = icmp eq i64 %indvars.iv.next220.1, %101
  br i1 %632, label %"consume argmin", label %"for argmin.s1.r4$x"

"consume argmin":                                 ; preds = %"for argmin.s1.r4$x.prol.loopexit", %"for argmin.s1.r4$x", %"for f1.s0.v1.v16.v16"
  %argmin.153.1 = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %"for f1.s0.v1.v16.v16" ], [ %.lcssa.unr.ph, %"for argmin.s1.r4$x.prol.loopexit" ], [ %631, %"for argmin.s1.r4$x" ]
  %argmin.054.1 = phi <4 x i32> [ zeroinitializer, %"for f1.s0.v1.v16.v16" ], [ %argmin.0.value.x4.lcssa.unr.ph, %"for argmin.s1.r4$x.prol.loopexit" ], [ %argmin.0.value.x4.1, %"for argmin.s1.r4$x" ]
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i106, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  %633 = icmp ne <4 x i8> %argmin.153.1, zeroinitializer
  %634 = select <4 x i1> %633, <4 x i32> zeroinitializer, <4 x i32> %argmin.054.1
  %635 = mul nsw i64 %574, %83
  %636 = add i64 %572, %635
  %637 = getelementptr inbounds i32, i32* %113, i64 %636
  %638 = bitcast i32* %637 to <4 x i32>*
  store <4 x i32> %634, <4 x i32>* %638, align 4, !tbaa !219
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i116, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  br i1 %.lcssa.lcssa, label %"for argmin.s1.r4$x33.preheader", label %"consume argmin38", !prof !210

"for argmin.s1.r4$x33.preheader":                 ; preds = %"consume argmin"
  %indvars.iv227.tr234 = trunc i64 %indvars.iv227 to i32
  %639 = shl i32 %indvars.iv227.tr234, 4
  br i1 %lcmp.mod316, label %"for argmin.s1.r4$x33.prol", label %"for argmin.s1.r4$x33.prol.loopexit"

"for argmin.s1.r4$x33.prol":                      ; preds = %"for argmin.s1.r4$x33.preheader"
  %t337.prol = add nsw i32 %639, 64
  %640 = sext i32 %t337.prol to i64
  %641 = or i64 %640, 4
  %642 = getelementptr inbounds float, float* %f0.0, i64 %641
  %643 = bitcast float* %642 to <4 x float>*
  %t338.prol = load <4 x float>, <4 x float>* %643, align 16, !tbaa !215
  %644 = getelementptr inbounds float, float* %f0.1, i64 %641
  %645 = bitcast float* %644 to <4 x float>*
  %t339.prol = load <4 x float>, <4 x float>* %645, align 16, !tbaa !217
  %646 = fsub <4 x float> zeroinitializer, %t339.prol
  %647 = fmul <4 x float> %t339.prol, %646
  %648 = fadd <4 x float> %647, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %649 = fmul <4 x float> %t338.prol, %t338.prol
  %650 = fcmp ogt <4 x float> %648, %649
  %651 = shufflevector <4 x i1> %650, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp58.prol = xor <128 x i1> %651, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %652 = and <128 x i1> %tmp58.prol, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %653 = shufflevector <128 x i1> %652, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %argmin.0.value.x436.prol = zext <4 x i1> %653 to <4 x i32>
  %654 = select <128 x i1> %652, <128 x i1> %651, <128 x i1> <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %argmin.1.value.x437.prol = shufflevector <128 x i1> %654, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %655 = zext <4 x i1> %argmin.1.value.x437.prol to <4 x i8>
  br label %"for argmin.s1.r4$x33.prol.loopexit"

"for argmin.s1.r4$x33.prol.loopexit":             ; preds = %"for argmin.s1.r4$x33.prol", %"for argmin.s1.r4$x33.preheader"
  %argmin.0.value.x436.lcssa.unr.ph = phi <4 x i32> [ %argmin.0.value.x436.prol, %"for argmin.s1.r4$x33.prol" ], [ undef, %"for argmin.s1.r4$x33.preheader" ]
  %.lcssa275.unr.ph = phi <4 x i8> [ %655, %"for argmin.s1.r4$x33.prol" ], [ undef, %"for argmin.s1.r4$x33.preheader" ]
  %indvars.iv221.unr.ph = phi i64 [ 2, %"for argmin.s1.r4$x33.prol" ], [ 1, %"for argmin.s1.r4$x33.preheader" ]
  %argmin.153.2.unr.ph = phi <4 x i8> [ %655, %"for argmin.s1.r4$x33.prol" ], [ <i8 1, i8 1, i8 1, i8 1>, %"for argmin.s1.r4$x33.preheader" ]
  %argmin.054.2.unr.ph = phi <4 x i32> [ %argmin.0.value.x436.prol, %"for argmin.s1.r4$x33.prol" ], [ zeroinitializer, %"for argmin.s1.r4$x33.preheader" ]
  br i1 %140, label %"consume argmin38", label %"for argmin.s1.r4$x33"

"for argmin.s1.r4$x33":                           ; preds = %"for argmin.s1.r4$x33.prol.loopexit", %"for argmin.s1.r4$x33"
  %indvars.iv221 = phi i64 [ %indvars.iv.next222.1, %"for argmin.s1.r4$x33" ], [ %indvars.iv221.unr.ph, %"for argmin.s1.r4$x33.prol.loopexit" ]
  %argmin.153.2 = phi <4 x i8> [ %696, %"for argmin.s1.r4$x33" ], [ %argmin.153.2.unr.ph, %"for argmin.s1.r4$x33.prol.loopexit" ]
  %argmin.054.2 = phi <4 x i32> [ %argmin.0.value.x436.1, %"for argmin.s1.r4$x33" ], [ %argmin.054.2.unr.ph, %"for argmin.s1.r4$x33.prol.loopexit" ]
  %656 = trunc i64 %indvars.iv221 to i32
  %657 = shl i32 %656, 6
  %t337 = add nsw i32 %657, %639
  %658 = sext i32 %t337 to i64
  %659 = or i64 %658, 4
  %660 = getelementptr inbounds float, float* %f0.0, i64 %659
  %661 = bitcast float* %660 to <4 x float>*
  %t338 = load <4 x float>, <4 x float>* %661, align 16, !tbaa !215
  %662 = getelementptr inbounds float, float* %f0.1, i64 %659
  %663 = bitcast float* %662 to <4 x float>*
  %t339 = load <4 x float>, <4 x float>* %663, align 16, !tbaa !217
  %664 = icmp ne <4 x i8> %argmin.153.2, zeroinitializer
  %665 = fsub <4 x float> zeroinitializer, %t339
  %666 = fmul <4 x float> %t339, %665
  %667 = fadd <4 x float> %666, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %668 = fmul <4 x float> %t338, %t338
  %669 = fcmp ogt <4 x float> %667, %668
  %670 = shufflevector <4 x i1> %664, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %671 = shufflevector <4 x i1> %669, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp58 = xor <128 x i1> %671, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %672 = and <128 x i1> %tmp58, %670
  %673 = shufflevector <128 x i1> %672, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %674 = insertelement <4 x i32> undef, i32 %656, i32 0
  %675 = shufflevector <4 x i32> %674, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x436 = select <4 x i1> %673, <4 x i32> %675, <4 x i32> %argmin.054.2
  %676 = select <128 x i1> %672, <128 x i1> %671, <128 x i1> %670
  %indvars.iv.next222 = add nuw nsw i64 %indvars.iv221, 1
  %677 = trunc i64 %indvars.iv.next222 to i32
  %678 = shl i32 %677, 6
  %t337.1 = add nsw i32 %678, %639
  %679 = sext i32 %t337.1 to i64
  %680 = or i64 %679, 4
  %681 = getelementptr inbounds float, float* %f0.0, i64 %680
  %682 = bitcast float* %681 to <4 x float>*
  %t338.1 = load <4 x float>, <4 x float>* %682, align 16, !tbaa !215
  %683 = getelementptr inbounds float, float* %f0.1, i64 %680
  %684 = bitcast float* %683 to <4 x float>*
  %t339.1 = load <4 x float>, <4 x float>* %684, align 16, !tbaa !217
  %685 = fsub <4 x float> zeroinitializer, %t339.1
  %686 = fmul <4 x float> %t339.1, %685
  %687 = fadd <4 x float> %686, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %688 = fmul <4 x float> %t338.1, %t338.1
  %689 = fcmp ogt <4 x float> %687, %688
  %690 = shufflevector <4 x i1> %689, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp58.1 = xor <128 x i1> %690, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %691 = and <128 x i1> %tmp58.1, %676
  %692 = shufflevector <128 x i1> %691, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %693 = insertelement <4 x i32> undef, i32 %677, i32 0
  %694 = shufflevector <4 x i32> %693, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x436.1 = select <4 x i1> %692, <4 x i32> %694, <4 x i32> %argmin.0.value.x436
  %695 = select <128 x i1> %691, <128 x i1> %690, <128 x i1> %676
  %argmin.1.value.x437.1 = shufflevector <128 x i1> %695, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %696 = zext <4 x i1> %argmin.1.value.x437.1 to <4 x i8>
  %indvars.iv.next222.1 = add nsw i64 %indvars.iv221, 2
  %697 = icmp eq i64 %indvars.iv.next222.1, %101
  br i1 %697, label %"consume argmin38", label %"for argmin.s1.r4$x33"

"consume argmin38":                               ; preds = %"for argmin.s1.r4$x33.prol.loopexit", %"for argmin.s1.r4$x33", %"consume argmin"
  %argmin.153.3 = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %"consume argmin" ], [ %.lcssa275.unr.ph, %"for argmin.s1.r4$x33.prol.loopexit" ], [ %696, %"for argmin.s1.r4$x33" ]
  %argmin.054.3 = phi <4 x i32> [ zeroinitializer, %"consume argmin" ], [ %argmin.0.value.x436.lcssa.unr.ph, %"for argmin.s1.r4$x33.prol.loopexit" ], [ %argmin.0.value.x436.1, %"for argmin.s1.r4$x33" ]
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i106, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  %698 = icmp ne <4 x i8> %argmin.153.3, zeroinitializer
  %699 = select <4 x i1> %698, <4 x i32> zeroinitializer, <4 x i32> %argmin.054.3
  %700 = add nsw i64 %636, 4
  %701 = getelementptr inbounds i32, i32* %113, i64 %700
  %702 = bitcast i32* %701 to <4 x i32>*
  store <4 x i32> %699, <4 x i32>* %702, align 4, !tbaa !219
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i116, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  br i1 %.lcssa.lcssa, label %"for argmin.s1.r4$x40.preheader", label %"consume argmin45", !prof !210

"for argmin.s1.r4$x40.preheader":                 ; preds = %"consume argmin38"
  %indvars.iv227.tr233 = trunc i64 %indvars.iv227 to i32
  %703 = shl i32 %indvars.iv227.tr233, 4
  br i1 %lcmp.mod318, label %"for argmin.s1.r4$x40.prol", label %"for argmin.s1.r4$x40.prol.loopexit"

"for argmin.s1.r4$x40.prol":                      ; preds = %"for argmin.s1.r4$x40.preheader"
  %t345.prol = add nsw i32 %703, 64
  %704 = sext i32 %t345.prol to i64
  %705 = or i64 %704, 8
  %706 = getelementptr inbounds float, float* %f0.0, i64 %705
  %707 = bitcast float* %706 to <4 x float>*
  %t346.prol = load <4 x float>, <4 x float>* %707, align 32, !tbaa !215
  %708 = getelementptr inbounds float, float* %f0.1, i64 %705
  %709 = bitcast float* %708 to <4 x float>*
  %t347.prol = load <4 x float>, <4 x float>* %709, align 32, !tbaa !217
  %710 = fsub <4 x float> zeroinitializer, %t347.prol
  %711 = fmul <4 x float> %t347.prol, %710
  %712 = fadd <4 x float> %711, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %713 = fmul <4 x float> %t346.prol, %t346.prol
  %714 = fcmp ogt <4 x float> %712, %713
  %715 = shufflevector <4 x i1> %714, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp57.prol = xor <128 x i1> %715, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %716 = and <128 x i1> %tmp57.prol, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %717 = shufflevector <128 x i1> %716, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %argmin.0.value.x443.prol = zext <4 x i1> %717 to <4 x i32>
  %718 = select <128 x i1> %716, <128 x i1> %715, <128 x i1> <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %argmin.1.value.x444.prol = shufflevector <128 x i1> %718, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %719 = zext <4 x i1> %argmin.1.value.x444.prol to <4 x i8>
  br label %"for argmin.s1.r4$x40.prol.loopexit"

"for argmin.s1.r4$x40.prol.loopexit":             ; preds = %"for argmin.s1.r4$x40.prol", %"for argmin.s1.r4$x40.preheader"
  %argmin.0.value.x443.lcssa.unr.ph = phi <4 x i32> [ %argmin.0.value.x443.prol, %"for argmin.s1.r4$x40.prol" ], [ undef, %"for argmin.s1.r4$x40.preheader" ]
  %.lcssa276.unr.ph = phi <4 x i8> [ %719, %"for argmin.s1.r4$x40.prol" ], [ undef, %"for argmin.s1.r4$x40.preheader" ]
  %indvars.iv223.unr.ph = phi i64 [ 2, %"for argmin.s1.r4$x40.prol" ], [ 1, %"for argmin.s1.r4$x40.preheader" ]
  %argmin.153.4.unr.ph = phi <4 x i8> [ %719, %"for argmin.s1.r4$x40.prol" ], [ <i8 1, i8 1, i8 1, i8 1>, %"for argmin.s1.r4$x40.preheader" ]
  %argmin.054.4.unr.ph = phi <4 x i32> [ %argmin.0.value.x443.prol, %"for argmin.s1.r4$x40.prol" ], [ zeroinitializer, %"for argmin.s1.r4$x40.preheader" ]
  br i1 %142, label %"consume argmin45", label %"for argmin.s1.r4$x40"

"for argmin.s1.r4$x40":                           ; preds = %"for argmin.s1.r4$x40.prol.loopexit", %"for argmin.s1.r4$x40"
  %indvars.iv223 = phi i64 [ %indvars.iv.next224.1, %"for argmin.s1.r4$x40" ], [ %indvars.iv223.unr.ph, %"for argmin.s1.r4$x40.prol.loopexit" ]
  %argmin.153.4 = phi <4 x i8> [ %760, %"for argmin.s1.r4$x40" ], [ %argmin.153.4.unr.ph, %"for argmin.s1.r4$x40.prol.loopexit" ]
  %argmin.054.4 = phi <4 x i32> [ %argmin.0.value.x443.1, %"for argmin.s1.r4$x40" ], [ %argmin.054.4.unr.ph, %"for argmin.s1.r4$x40.prol.loopexit" ]
  %720 = trunc i64 %indvars.iv223 to i32
  %721 = shl i32 %720, 6
  %t345 = add nsw i32 %721, %703
  %722 = sext i32 %t345 to i64
  %723 = or i64 %722, 8
  %724 = getelementptr inbounds float, float* %f0.0, i64 %723
  %725 = bitcast float* %724 to <4 x float>*
  %t346 = load <4 x float>, <4 x float>* %725, align 32, !tbaa !215
  %726 = getelementptr inbounds float, float* %f0.1, i64 %723
  %727 = bitcast float* %726 to <4 x float>*
  %t347 = load <4 x float>, <4 x float>* %727, align 32, !tbaa !217
  %728 = icmp ne <4 x i8> %argmin.153.4, zeroinitializer
  %729 = fsub <4 x float> zeroinitializer, %t347
  %730 = fmul <4 x float> %t347, %729
  %731 = fadd <4 x float> %730, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %732 = fmul <4 x float> %t346, %t346
  %733 = fcmp ogt <4 x float> %731, %732
  %734 = shufflevector <4 x i1> %728, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %735 = shufflevector <4 x i1> %733, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp57 = xor <128 x i1> %735, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %736 = and <128 x i1> %tmp57, %734
  %737 = shufflevector <128 x i1> %736, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %738 = insertelement <4 x i32> undef, i32 %720, i32 0
  %739 = shufflevector <4 x i32> %738, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x443 = select <4 x i1> %737, <4 x i32> %739, <4 x i32> %argmin.054.4
  %740 = select <128 x i1> %736, <128 x i1> %735, <128 x i1> %734
  %indvars.iv.next224 = add nuw nsw i64 %indvars.iv223, 1
  %741 = trunc i64 %indvars.iv.next224 to i32
  %742 = shl i32 %741, 6
  %t345.1 = add nsw i32 %742, %703
  %743 = sext i32 %t345.1 to i64
  %744 = or i64 %743, 8
  %745 = getelementptr inbounds float, float* %f0.0, i64 %744
  %746 = bitcast float* %745 to <4 x float>*
  %t346.1 = load <4 x float>, <4 x float>* %746, align 32, !tbaa !215
  %747 = getelementptr inbounds float, float* %f0.1, i64 %744
  %748 = bitcast float* %747 to <4 x float>*
  %t347.1 = load <4 x float>, <4 x float>* %748, align 32, !tbaa !217
  %749 = fsub <4 x float> zeroinitializer, %t347.1
  %750 = fmul <4 x float> %t347.1, %749
  %751 = fadd <4 x float> %750, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %752 = fmul <4 x float> %t346.1, %t346.1
  %753 = fcmp ogt <4 x float> %751, %752
  %754 = shufflevector <4 x i1> %753, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp57.1 = xor <128 x i1> %754, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %755 = and <128 x i1> %tmp57.1, %740
  %756 = shufflevector <128 x i1> %755, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %757 = insertelement <4 x i32> undef, i32 %741, i32 0
  %758 = shufflevector <4 x i32> %757, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x443.1 = select <4 x i1> %756, <4 x i32> %758, <4 x i32> %argmin.0.value.x443
  %759 = select <128 x i1> %755, <128 x i1> %754, <128 x i1> %740
  %argmin.1.value.x444.1 = shufflevector <128 x i1> %759, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %760 = zext <4 x i1> %argmin.1.value.x444.1 to <4 x i8>
  %indvars.iv.next224.1 = add nsw i64 %indvars.iv223, 2
  %761 = icmp eq i64 %indvars.iv.next224.1, %101
  br i1 %761, label %"consume argmin45", label %"for argmin.s1.r4$x40"

"consume argmin45":                               ; preds = %"for argmin.s1.r4$x40.prol.loopexit", %"for argmin.s1.r4$x40", %"consume argmin38"
  %argmin.153.5 = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %"consume argmin38" ], [ %.lcssa276.unr.ph, %"for argmin.s1.r4$x40.prol.loopexit" ], [ %760, %"for argmin.s1.r4$x40" ]
  %argmin.054.5 = phi <4 x i32> [ zeroinitializer, %"consume argmin38" ], [ %argmin.0.value.x443.lcssa.unr.ph, %"for argmin.s1.r4$x40.prol.loopexit" ], [ %argmin.0.value.x443.1, %"for argmin.s1.r4$x40" ]
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i106, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  %762 = icmp ne <4 x i8> %argmin.153.5, zeroinitializer
  %763 = select <4 x i1> %762, <4 x i32> zeroinitializer, <4 x i32> %argmin.054.5
  %764 = add nsw i64 %574, 1
  %765 = mul nsw i64 %764, %83
  %766 = add i64 %572, %765
  %767 = getelementptr inbounds i32, i32* %113, i64 %766
  %768 = bitcast i32* %767 to <4 x i32>*
  store <4 x i32> %763, <4 x i32>* %768, align 4, !tbaa !219
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i116, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  br i1 %.lcssa.lcssa, label %"for argmin.s1.r4$x47.preheader", label %"consume argmin52", !prof !210

"for argmin.s1.r4$x47.preheader":                 ; preds = %"consume argmin45"
  %indvars.iv227.tr = trunc i64 %indvars.iv227 to i32
  %769 = shl i32 %indvars.iv227.tr, 4
  br i1 %lcmp.mod320, label %"for argmin.s1.r4$x47.prol", label %"for argmin.s1.r4$x47.prol.loopexit"

"for argmin.s1.r4$x47.prol":                      ; preds = %"for argmin.s1.r4$x47.preheader"
  %t353.prol = add nsw i32 %769, 64
  %770 = sext i32 %t353.prol to i64
  %771 = or i64 %770, 12
  %772 = getelementptr inbounds float, float* %f0.0, i64 %771
  %773 = bitcast float* %772 to <4 x float>*
  %t354.prol = load <4 x float>, <4 x float>* %773, align 16, !tbaa !215
  %774 = getelementptr inbounds float, float* %f0.1, i64 %771
  %775 = bitcast float* %774 to <4 x float>*
  %t355.prol = load <4 x float>, <4 x float>* %775, align 16, !tbaa !217
  %776 = fsub <4 x float> zeroinitializer, %t355.prol
  %777 = fmul <4 x float> %t355.prol, %776
  %778 = fadd <4 x float> %777, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %779 = fmul <4 x float> %t354.prol, %t354.prol
  %780 = fcmp ogt <4 x float> %778, %779
  %781 = shufflevector <4 x i1> %780, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp.prol = xor <128 x i1> %781, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %782 = and <128 x i1> %tmp.prol, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %783 = shufflevector <128 x i1> %782, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %argmin.0.value.x450.prol = zext <4 x i1> %783 to <4 x i32>
  %784 = select <128 x i1> %782, <128 x i1> %781, <128 x i1> <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %argmin.1.value.x451.prol = shufflevector <128 x i1> %784, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %785 = zext <4 x i1> %argmin.1.value.x451.prol to <4 x i8>
  br label %"for argmin.s1.r4$x47.prol.loopexit"

"for argmin.s1.r4$x47.prol.loopexit":             ; preds = %"for argmin.s1.r4$x47.prol", %"for argmin.s1.r4$x47.preheader"
  %argmin.0.value.x450.lcssa.unr.ph = phi <4 x i32> [ %argmin.0.value.x450.prol, %"for argmin.s1.r4$x47.prol" ], [ undef, %"for argmin.s1.r4$x47.preheader" ]
  %.lcssa277.unr.ph = phi <4 x i8> [ %785, %"for argmin.s1.r4$x47.prol" ], [ undef, %"for argmin.s1.r4$x47.preheader" ]
  %indvars.iv225.unr.ph = phi i64 [ 2, %"for argmin.s1.r4$x47.prol" ], [ 1, %"for argmin.s1.r4$x47.preheader" ]
  %argmin.153.6.unr.ph = phi <4 x i8> [ %785, %"for argmin.s1.r4$x47.prol" ], [ <i8 1, i8 1, i8 1, i8 1>, %"for argmin.s1.r4$x47.preheader" ]
  %argmin.054.6.unr.ph = phi <4 x i32> [ %argmin.0.value.x450.prol, %"for argmin.s1.r4$x47.prol" ], [ zeroinitializer, %"for argmin.s1.r4$x47.preheader" ]
  br i1 %144, label %"consume argmin52", label %"for argmin.s1.r4$x47"

"for argmin.s1.r4$x47":                           ; preds = %"for argmin.s1.r4$x47.prol.loopexit", %"for argmin.s1.r4$x47"
  %indvars.iv225 = phi i64 [ %indvars.iv.next226.1, %"for argmin.s1.r4$x47" ], [ %indvars.iv225.unr.ph, %"for argmin.s1.r4$x47.prol.loopexit" ]
  %argmin.153.6 = phi <4 x i8> [ %826, %"for argmin.s1.r4$x47" ], [ %argmin.153.6.unr.ph, %"for argmin.s1.r4$x47.prol.loopexit" ]
  %argmin.054.6 = phi <4 x i32> [ %argmin.0.value.x450.1, %"for argmin.s1.r4$x47" ], [ %argmin.054.6.unr.ph, %"for argmin.s1.r4$x47.prol.loopexit" ]
  %786 = trunc i64 %indvars.iv225 to i32
  %787 = shl i32 %786, 6
  %t353 = add nsw i32 %787, %769
  %788 = sext i32 %t353 to i64
  %789 = or i64 %788, 12
  %790 = getelementptr inbounds float, float* %f0.0, i64 %789
  %791 = bitcast float* %790 to <4 x float>*
  %t354 = load <4 x float>, <4 x float>* %791, align 16, !tbaa !215
  %792 = getelementptr inbounds float, float* %f0.1, i64 %789
  %793 = bitcast float* %792 to <4 x float>*
  %t355 = load <4 x float>, <4 x float>* %793, align 16, !tbaa !217
  %794 = icmp ne <4 x i8> %argmin.153.6, zeroinitializer
  %795 = fsub <4 x float> zeroinitializer, %t355
  %796 = fmul <4 x float> %t355, %795
  %797 = fadd <4 x float> %796, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %798 = fmul <4 x float> %t354, %t354
  %799 = fcmp ogt <4 x float> %797, %798
  %800 = shufflevector <4 x i1> %794, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %801 = shufflevector <4 x i1> %799, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp = xor <128 x i1> %801, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %802 = and <128 x i1> %tmp, %800
  %803 = shufflevector <128 x i1> %802, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %804 = insertelement <4 x i32> undef, i32 %786, i32 0
  %805 = shufflevector <4 x i32> %804, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x450 = select <4 x i1> %803, <4 x i32> %805, <4 x i32> %argmin.054.6
  %806 = select <128 x i1> %802, <128 x i1> %801, <128 x i1> %800
  %indvars.iv.next226 = add nuw nsw i64 %indvars.iv225, 1
  %807 = trunc i64 %indvars.iv.next226 to i32
  %808 = shl i32 %807, 6
  %t353.1 = add nsw i32 %808, %769
  %809 = sext i32 %t353.1 to i64
  %810 = or i64 %809, 12
  %811 = getelementptr inbounds float, float* %f0.0, i64 %810
  %812 = bitcast float* %811 to <4 x float>*
  %t354.1 = load <4 x float>, <4 x float>* %812, align 16, !tbaa !215
  %813 = getelementptr inbounds float, float* %f0.1, i64 %810
  %814 = bitcast float* %813 to <4 x float>*
  %t355.1 = load <4 x float>, <4 x float>* %814, align 16, !tbaa !217
  %815 = fsub <4 x float> zeroinitializer, %t355.1
  %816 = fmul <4 x float> %t355.1, %815
  %817 = fadd <4 x float> %816, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
  %818 = fmul <4 x float> %t354.1, %t354.1
  %819 = fcmp ogt <4 x float> %817, %818
  %820 = shufflevector <4 x i1> %819, <4 x i1> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %tmp.1 = xor <128 x i1> %820, <i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>
  %821 = and <128 x i1> %tmp.1, %806
  %822 = shufflevector <128 x i1> %821, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %823 = insertelement <4 x i32> undef, i32 %807, i32 0
  %824 = shufflevector <4 x i32> %823, <4 x i32> undef, <4 x i32> zeroinitializer
  %argmin.0.value.x450.1 = select <4 x i1> %822, <4 x i32> %824, <4 x i32> %argmin.0.value.x450
  %825 = select <128 x i1> %821, <128 x i1> %820, <128 x i1> %806
  %argmin.1.value.x451.1 = shufflevector <128 x i1> %825, <128 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %826 = zext <4 x i1> %argmin.1.value.x451.1 to <4 x i8>
  %indvars.iv.next226.1 = add nsw i64 %indvars.iv225, 2
  %827 = icmp eq i64 %indvars.iv.next226.1, %101
  br i1 %827, label %"consume argmin52", label %"for argmin.s1.r4$x47"

"consume argmin52":                               ; preds = %"for argmin.s1.r4$x47.prol.loopexit", %"for argmin.s1.r4$x47", %"consume argmin45"
  %argmin.153.7 = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %"consume argmin45" ], [ %.lcssa277.unr.ph, %"for argmin.s1.r4$x47.prol.loopexit" ], [ %826, %"for argmin.s1.r4$x47" ]
  %argmin.054.7 = phi <4 x i32> [ zeroinitializer, %"consume argmin45" ], [ %argmin.0.value.x450.lcssa.unr.ph, %"for argmin.s1.r4$x47.prol.loopexit" ], [ %argmin.0.value.x450.1, %"for argmin.s1.r4$x47" ]
  call void asm sideeffect "", ""() #9, !srcloc !175
  store volatile i32 %add.i106, i32* %current_func.i105, align 4, !tbaa !38
  call void asm sideeffect "", ""() #9, !srcloc !176
  %828 = icmp ne <4 x i8> %argmin.153.7, zeroinitializer
  %829 = select <4 x i1> %828, <4 x i32> zeroinitializer, <4 x i32> %argmin.054.7
  %830 = add nsw i64 %766, 4
  %831 = getelementptr inbounds i32, i32* %113, i64 %830
  %832 = bitcast i32* %831 to <4 x i32>*
  store <4 x i32> %829, <4 x i32>* %832, align 4, !tbaa !219
  %indvars.iv.next228 = add nuw nsw i64 %indvars.iv227, 1
  %833 = icmp eq i64 %indvars.iv.next228, 4
  br i1 %833, label %call_destructor.exit124, label %"for f1.s0.v1.v16.v16"

"for f0.s1.r4$x.us.us.1":                         ; preds = %"for f0.s1.r4$x.us.us.1.prol.loopexit", %"for f0.s1.r4$x.us.us.1"
  %indvars.iv207.1 = phi i64 [ %indvars.iv.next208.1.1, %"for f0.s1.r4$x.us.us.1" ], [ %indvars.iv207.1.unr.ph, %"for f0.s1.r4$x.us.us.1.prol.loopexit" ]
  %834 = trunc i64 %indvars.iv207.1 to i32
  %835 = shl i32 %834, 6
  %t325.us.us.1 = add i32 %316, %835
  %836 = sext i32 %t325.us.us.1 to i64
  %837 = add nsw i64 %836, -64
  %838 = getelementptr inbounds float, float* %f0.0, i64 %837
  %t326.us.us.1 = load float, float* %838, align 4, !tbaa !215
  %839 = getelementptr inbounds float, float* %f0.1, i64 %837
  %t327.us.us.1 = load float, float* %839, align 4, !tbaa !217
  %840 = fmul float %t327.us.us.1, %t327.us.us.1
  %841 = fmul float %t326.us.us.1, %t326.us.us.1
  %842 = fsub float %841, %840
  %843 = fadd float %842, %c_real
  %844 = shl nsw i64 %indvars.iv207.1, 6
  %845 = add nuw nsw i64 %317, %844
  %846 = getelementptr inbounds float, float* %f0.0, i64 %845
  store float %843, float* %846, align 4, !tbaa !215
  %847 = load float, float* %838, align 4, !tbaa !215
  %848 = fmul float %t327.us.us.1, %847
  %849 = fmul float %848, 2.000000e+00
  %850 = fadd float %849, %c_imag
  %851 = getelementptr inbounds float, float* %f0.1, i64 %845
  store float %850, float* %851, align 4, !tbaa !217
  %indvars.iv.next208.1 = add nuw nsw i64 %indvars.iv207.1, 1
  %852 = trunc i64 %indvars.iv.next208.1 to i32
  %853 = shl i32 %852, 6
  %t325.us.us.1.1 = add i32 %316, %853
  %854 = sext i32 %t325.us.us.1.1 to i64
  %855 = add nsw i64 %854, -64
  %856 = getelementptr inbounds float, float* %f0.0, i64 %855
  %t326.us.us.1.1 = load float, float* %856, align 4, !tbaa !215
  %857 = getelementptr inbounds float, float* %f0.1, i64 %855
  %t327.us.us.1.1 = load float, float* %857, align 4, !tbaa !217
  %858 = fmul float %t327.us.us.1.1, %t327.us.us.1.1
  %859 = fmul float %t326.us.us.1.1, %t326.us.us.1.1
  %860 = fsub float %859, %858
  %861 = fadd float %860, %c_real
  %862 = shl nsw i64 %indvars.iv.next208.1, 6
  %863 = add nuw nsw i64 %317, %862
  %864 = getelementptr inbounds float, float* %f0.0, i64 %863
  store float %861, float* %864, align 4, !tbaa !215
  %865 = load float, float* %856, align 4, !tbaa !215
  %866 = fmul float %t327.us.us.1.1, %865
  %867 = fmul float %866, 2.000000e+00
  %868 = fadd float %867, %c_imag
  %869 = getelementptr inbounds float, float* %f0.1, i64 %863
  store float %868, float* %869, align 4, !tbaa !217
  %indvars.iv.next208.1.1 = add nsw i64 %indvars.iv207.1, 2
  %870 = icmp eq i64 %indvars.iv.next208.1.1, %101
  br i1 %870, label %"end for f0.s1.r4$x.loopexit.us.us.1", label %"for f0.s1.r4$x.us.us.1"

"end for f0.s1.r4$x.loopexit.us.us.1":            ; preds = %"for f0.s1.r4$x.us.us.1", %"for f0.s1.r4$x.us.us.1.prol.loopexit"
  %871 = or i32 %298, 2
  %872 = add nsw i64 %299, %indvars.iv.next213.1
  br i1 %lcmp.mod302, label %"for f0.s1.r4$x.us.us.2.prol", label %"for f0.s1.r4$x.us.us.2.prol.loopexit"

"for f0.s1.r4$x.us.us.2.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us.1"
  %t325.us.us.2.prol = add i32 %871, 64
  %873 = sext i32 %t325.us.us.2.prol to i64
  %874 = add nsw i64 %873, -64
  %875 = getelementptr inbounds float, float* %f0.0, i64 %874
  %t326.us.us.2.prol = load float, float* %875, align 4, !tbaa !215
  %876 = getelementptr inbounds float, float* %f0.1, i64 %874
  %t327.us.us.2.prol = load float, float* %876, align 4, !tbaa !217
  %877 = fmul float %t327.us.us.2.prol, %t327.us.us.2.prol
  %878 = fmul float %t326.us.us.2.prol, %t326.us.us.2.prol
  %879 = fsub float %878, %877
  %880 = fadd float %879, %c_real
  %881 = add nuw nsw i64 %872, 64
  %882 = getelementptr inbounds float, float* %f0.0, i64 %881
  store float %880, float* %882, align 4, !tbaa !215
  %883 = load float, float* %875, align 4, !tbaa !215
  %884 = fmul float %t327.us.us.2.prol, %883
  %885 = fmul float %884, 2.000000e+00
  %886 = fadd float %885, %c_imag
  %887 = getelementptr inbounds float, float* %f0.1, i64 %881
  store float %886, float* %887, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.2.prol.loopexit"

"for f0.s1.r4$x.us.us.2.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.2.prol", %"end for f0.s1.r4$x.loopexit.us.us.1"
  %indvars.iv207.2.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.2.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us.1" ]
  br i1 %126, label %"end for f0.s1.r4$x.loopexit.us.us.2", label %"for f0.s1.r4$x.us.us.2"

"for f0.s1.r4$x.us.us.2":                         ; preds = %"for f0.s1.r4$x.us.us.2.prol.loopexit", %"for f0.s1.r4$x.us.us.2"
  %indvars.iv207.2 = phi i64 [ %indvars.iv.next208.2.1, %"for f0.s1.r4$x.us.us.2" ], [ %indvars.iv207.2.unr.ph, %"for f0.s1.r4$x.us.us.2.prol.loopexit" ]
  %888 = trunc i64 %indvars.iv207.2 to i32
  %889 = shl i32 %888, 6
  %t325.us.us.2 = add i32 %871, %889
  %890 = sext i32 %t325.us.us.2 to i64
  %891 = add nsw i64 %890, -64
  %892 = getelementptr inbounds float, float* %f0.0, i64 %891
  %t326.us.us.2 = load float, float* %892, align 4, !tbaa !215
  %893 = getelementptr inbounds float, float* %f0.1, i64 %891
  %t327.us.us.2 = load float, float* %893, align 4, !tbaa !217
  %894 = fmul float %t327.us.us.2, %t327.us.us.2
  %895 = fmul float %t326.us.us.2, %t326.us.us.2
  %896 = fsub float %895, %894
  %897 = fadd float %896, %c_real
  %898 = shl nsw i64 %indvars.iv207.2, 6
  %899 = add nuw nsw i64 %872, %898
  %900 = getelementptr inbounds float, float* %f0.0, i64 %899
  store float %897, float* %900, align 4, !tbaa !215
  %901 = load float, float* %892, align 4, !tbaa !215
  %902 = fmul float %t327.us.us.2, %901
  %903 = fmul float %902, 2.000000e+00
  %904 = fadd float %903, %c_imag
  %905 = getelementptr inbounds float, float* %f0.1, i64 %899
  store float %904, float* %905, align 4, !tbaa !217
  %indvars.iv.next208.2 = add nuw nsw i64 %indvars.iv207.2, 1
  %906 = trunc i64 %indvars.iv.next208.2 to i32
  %907 = shl i32 %906, 6
  %t325.us.us.2.1 = add i32 %871, %907
  %908 = sext i32 %t325.us.us.2.1 to i64
  %909 = add nsw i64 %908, -64
  %910 = getelementptr inbounds float, float* %f0.0, i64 %909
  %t326.us.us.2.1 = load float, float* %910, align 4, !tbaa !215
  %911 = getelementptr inbounds float, float* %f0.1, i64 %909
  %t327.us.us.2.1 = load float, float* %911, align 4, !tbaa !217
  %912 = fmul float %t327.us.us.2.1, %t327.us.us.2.1
  %913 = fmul float %t326.us.us.2.1, %t326.us.us.2.1
  %914 = fsub float %913, %912
  %915 = fadd float %914, %c_real
  %916 = shl nsw i64 %indvars.iv.next208.2, 6
  %917 = add nuw nsw i64 %872, %916
  %918 = getelementptr inbounds float, float* %f0.0, i64 %917
  store float %915, float* %918, align 4, !tbaa !215
  %919 = load float, float* %910, align 4, !tbaa !215
  %920 = fmul float %t327.us.us.2.1, %919
  %921 = fmul float %920, 2.000000e+00
  %922 = fadd float %921, %c_imag
  %923 = getelementptr inbounds float, float* %f0.1, i64 %917
  store float %922, float* %923, align 4, !tbaa !217
  %indvars.iv.next208.2.1 = add nsw i64 %indvars.iv207.2, 2
  %924 = icmp eq i64 %indvars.iv.next208.2.1, %101
  br i1 %924, label %"end for f0.s1.r4$x.loopexit.us.us.2", label %"for f0.s1.r4$x.us.us.2"

"end for f0.s1.r4$x.loopexit.us.us.2":            ; preds = %"for f0.s1.r4$x.us.us.2", %"for f0.s1.r4$x.us.us.2.prol.loopexit"
  %925 = or i32 %298, 3
  %926 = add nsw i64 %299, %indvars.iv.next213.2
  br i1 %lcmp.mod304, label %"for f0.s1.r4$x.us.us.3.prol", label %"for f0.s1.r4$x.us.us.3.prol.loopexit"

"for f0.s1.r4$x.us.us.3.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us.2"
  %t325.us.us.3.prol = add i32 %925, 64
  %927 = sext i32 %t325.us.us.3.prol to i64
  %928 = add nsw i64 %927, -64
  %929 = getelementptr inbounds float, float* %f0.0, i64 %928
  %t326.us.us.3.prol = load float, float* %929, align 4, !tbaa !215
  %930 = getelementptr inbounds float, float* %f0.1, i64 %928
  %t327.us.us.3.prol = load float, float* %930, align 4, !tbaa !217
  %931 = fmul float %t327.us.us.3.prol, %t327.us.us.3.prol
  %932 = fmul float %t326.us.us.3.prol, %t326.us.us.3.prol
  %933 = fsub float %932, %931
  %934 = fadd float %933, %c_real
  %935 = add nuw nsw i64 %926, 64
  %936 = getelementptr inbounds float, float* %f0.0, i64 %935
  store float %934, float* %936, align 4, !tbaa !215
  %937 = load float, float* %929, align 4, !tbaa !215
  %938 = fmul float %t327.us.us.3.prol, %937
  %939 = fmul float %938, 2.000000e+00
  %940 = fadd float %939, %c_imag
  %941 = getelementptr inbounds float, float* %f0.1, i64 %935
  store float %940, float* %941, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.3.prol.loopexit"

"for f0.s1.r4$x.us.us.3.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.3.prol", %"end for f0.s1.r4$x.loopexit.us.us.2"
  %indvars.iv207.3.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.3.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us.2" ]
  br i1 %128, label %"end for f0.s1.r4$x.loopexit.us.us.3", label %"for f0.s1.r4$x.us.us.3"

"for f0.s1.r4$x.us.us.3":                         ; preds = %"for f0.s1.r4$x.us.us.3.prol.loopexit", %"for f0.s1.r4$x.us.us.3"
  %indvars.iv207.3 = phi i64 [ %indvars.iv.next208.3.1, %"for f0.s1.r4$x.us.us.3" ], [ %indvars.iv207.3.unr.ph, %"for f0.s1.r4$x.us.us.3.prol.loopexit" ]
  %942 = trunc i64 %indvars.iv207.3 to i32
  %943 = shl i32 %942, 6
  %t325.us.us.3 = add i32 %925, %943
  %944 = sext i32 %t325.us.us.3 to i64
  %945 = add nsw i64 %944, -64
  %946 = getelementptr inbounds float, float* %f0.0, i64 %945
  %t326.us.us.3 = load float, float* %946, align 4, !tbaa !215
  %947 = getelementptr inbounds float, float* %f0.1, i64 %945
  %t327.us.us.3 = load float, float* %947, align 4, !tbaa !217
  %948 = fmul float %t327.us.us.3, %t327.us.us.3
  %949 = fmul float %t326.us.us.3, %t326.us.us.3
  %950 = fsub float %949, %948
  %951 = fadd float %950, %c_real
  %952 = shl nsw i64 %indvars.iv207.3, 6
  %953 = add nuw nsw i64 %926, %952
  %954 = getelementptr inbounds float, float* %f0.0, i64 %953
  store float %951, float* %954, align 4, !tbaa !215
  %955 = load float, float* %946, align 4, !tbaa !215
  %956 = fmul float %t327.us.us.3, %955
  %957 = fmul float %956, 2.000000e+00
  %958 = fadd float %957, %c_imag
  %959 = getelementptr inbounds float, float* %f0.1, i64 %953
  store float %958, float* %959, align 4, !tbaa !217
  %indvars.iv.next208.3 = add nuw nsw i64 %indvars.iv207.3, 1
  %960 = trunc i64 %indvars.iv.next208.3 to i32
  %961 = shl i32 %960, 6
  %t325.us.us.3.1 = add i32 %925, %961
  %962 = sext i32 %t325.us.us.3.1 to i64
  %963 = add nsw i64 %962, -64
  %964 = getelementptr inbounds float, float* %f0.0, i64 %963
  %t326.us.us.3.1 = load float, float* %964, align 4, !tbaa !215
  %965 = getelementptr inbounds float, float* %f0.1, i64 %963
  %t327.us.us.3.1 = load float, float* %965, align 4, !tbaa !217
  %966 = fmul float %t327.us.us.3.1, %t327.us.us.3.1
  %967 = fmul float %t326.us.us.3.1, %t326.us.us.3.1
  %968 = fsub float %967, %966
  %969 = fadd float %968, %c_real
  %970 = shl nsw i64 %indvars.iv.next208.3, 6
  %971 = add nuw nsw i64 %926, %970
  %972 = getelementptr inbounds float, float* %f0.0, i64 %971
  store float %969, float* %972, align 4, !tbaa !215
  %973 = load float, float* %964, align 4, !tbaa !215
  %974 = fmul float %t327.us.us.3.1, %973
  %975 = fmul float %974, 2.000000e+00
  %976 = fadd float %975, %c_imag
  %977 = getelementptr inbounds float, float* %f0.1, i64 %971
  store float %976, float* %977, align 4, !tbaa !217
  %indvars.iv.next208.3.1 = add nsw i64 %indvars.iv207.3, 2
  %978 = icmp eq i64 %indvars.iv.next208.3.1, %101
  br i1 %978, label %"end for f0.s1.r4$x.loopexit.us.us.3", label %"for f0.s1.r4$x.us.us.3"

"end for f0.s1.r4$x.loopexit.us.us.3":            ; preds = %"for f0.s1.r4$x.us.us.3", %"for f0.s1.r4$x.us.us.3.prol.loopexit"
  %979 = or i32 %298, 4
  %980 = add nsw i64 %299, %indvars.iv.next213.3
  br i1 %lcmp.mod306, label %"for f0.s1.r4$x.us.us.4.prol", label %"for f0.s1.r4$x.us.us.4.prol.loopexit"

"for f0.s1.r4$x.us.us.4.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us.3"
  %t325.us.us.4.prol = add i32 %979, 64
  %981 = sext i32 %t325.us.us.4.prol to i64
  %982 = add nsw i64 %981, -64
  %983 = getelementptr inbounds float, float* %f0.0, i64 %982
  %t326.us.us.4.prol = load float, float* %983, align 4, !tbaa !215
  %984 = getelementptr inbounds float, float* %f0.1, i64 %982
  %t327.us.us.4.prol = load float, float* %984, align 4, !tbaa !217
  %985 = fmul float %t327.us.us.4.prol, %t327.us.us.4.prol
  %986 = fmul float %t326.us.us.4.prol, %t326.us.us.4.prol
  %987 = fsub float %986, %985
  %988 = fadd float %987, %c_real
  %989 = add nuw nsw i64 %980, 64
  %990 = getelementptr inbounds float, float* %f0.0, i64 %989
  store float %988, float* %990, align 4, !tbaa !215
  %991 = load float, float* %983, align 4, !tbaa !215
  %992 = fmul float %t327.us.us.4.prol, %991
  %993 = fmul float %992, 2.000000e+00
  %994 = fadd float %993, %c_imag
  %995 = getelementptr inbounds float, float* %f0.1, i64 %989
  store float %994, float* %995, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.4.prol.loopexit"

"for f0.s1.r4$x.us.us.4.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.4.prol", %"end for f0.s1.r4$x.loopexit.us.us.3"
  %indvars.iv207.4.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.4.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us.3" ]
  br i1 %130, label %"end for f0.s1.r4$x.loopexit.us.us.4", label %"for f0.s1.r4$x.us.us.4"

"for f0.s1.r4$x.us.us.4":                         ; preds = %"for f0.s1.r4$x.us.us.4.prol.loopexit", %"for f0.s1.r4$x.us.us.4"
  %indvars.iv207.4 = phi i64 [ %indvars.iv.next208.4.1, %"for f0.s1.r4$x.us.us.4" ], [ %indvars.iv207.4.unr.ph, %"for f0.s1.r4$x.us.us.4.prol.loopexit" ]
  %996 = trunc i64 %indvars.iv207.4 to i32
  %997 = shl i32 %996, 6
  %t325.us.us.4 = add i32 %979, %997
  %998 = sext i32 %t325.us.us.4 to i64
  %999 = add nsw i64 %998, -64
  %1000 = getelementptr inbounds float, float* %f0.0, i64 %999
  %t326.us.us.4 = load float, float* %1000, align 4, !tbaa !215
  %1001 = getelementptr inbounds float, float* %f0.1, i64 %999
  %t327.us.us.4 = load float, float* %1001, align 4, !tbaa !217
  %1002 = fmul float %t327.us.us.4, %t327.us.us.4
  %1003 = fmul float %t326.us.us.4, %t326.us.us.4
  %1004 = fsub float %1003, %1002
  %1005 = fadd float %1004, %c_real
  %1006 = shl nsw i64 %indvars.iv207.4, 6
  %1007 = add nuw nsw i64 %980, %1006
  %1008 = getelementptr inbounds float, float* %f0.0, i64 %1007
  store float %1005, float* %1008, align 4, !tbaa !215
  %1009 = load float, float* %1000, align 4, !tbaa !215
  %1010 = fmul float %t327.us.us.4, %1009
  %1011 = fmul float %1010, 2.000000e+00
  %1012 = fadd float %1011, %c_imag
  %1013 = getelementptr inbounds float, float* %f0.1, i64 %1007
  store float %1012, float* %1013, align 4, !tbaa !217
  %indvars.iv.next208.4 = add nuw nsw i64 %indvars.iv207.4, 1
  %1014 = trunc i64 %indvars.iv.next208.4 to i32
  %1015 = shl i32 %1014, 6
  %t325.us.us.4.1 = add i32 %979, %1015
  %1016 = sext i32 %t325.us.us.4.1 to i64
  %1017 = add nsw i64 %1016, -64
  %1018 = getelementptr inbounds float, float* %f0.0, i64 %1017
  %t326.us.us.4.1 = load float, float* %1018, align 4, !tbaa !215
  %1019 = getelementptr inbounds float, float* %f0.1, i64 %1017
  %t327.us.us.4.1 = load float, float* %1019, align 4, !tbaa !217
  %1020 = fmul float %t327.us.us.4.1, %t327.us.us.4.1
  %1021 = fmul float %t326.us.us.4.1, %t326.us.us.4.1
  %1022 = fsub float %1021, %1020
  %1023 = fadd float %1022, %c_real
  %1024 = shl nsw i64 %indvars.iv.next208.4, 6
  %1025 = add nuw nsw i64 %980, %1024
  %1026 = getelementptr inbounds float, float* %f0.0, i64 %1025
  store float %1023, float* %1026, align 4, !tbaa !215
  %1027 = load float, float* %1018, align 4, !tbaa !215
  %1028 = fmul float %t327.us.us.4.1, %1027
  %1029 = fmul float %1028, 2.000000e+00
  %1030 = fadd float %1029, %c_imag
  %1031 = getelementptr inbounds float, float* %f0.1, i64 %1025
  store float %1030, float* %1031, align 4, !tbaa !217
  %indvars.iv.next208.4.1 = add nsw i64 %indvars.iv207.4, 2
  %1032 = icmp eq i64 %indvars.iv.next208.4.1, %101
  br i1 %1032, label %"end for f0.s1.r4$x.loopexit.us.us.4", label %"for f0.s1.r4$x.us.us.4"

"end for f0.s1.r4$x.loopexit.us.us.4":            ; preds = %"for f0.s1.r4$x.us.us.4", %"for f0.s1.r4$x.us.us.4.prol.loopexit"
  %1033 = or i32 %298, 5
  %1034 = add nsw i64 %299, %indvars.iv.next213.4
  br i1 %lcmp.mod308, label %"for f0.s1.r4$x.us.us.5.prol", label %"for f0.s1.r4$x.us.us.5.prol.loopexit"

"for f0.s1.r4$x.us.us.5.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us.4"
  %t325.us.us.5.prol = add i32 %1033, 64
  %1035 = sext i32 %t325.us.us.5.prol to i64
  %1036 = add nsw i64 %1035, -64
  %1037 = getelementptr inbounds float, float* %f0.0, i64 %1036
  %t326.us.us.5.prol = load float, float* %1037, align 4, !tbaa !215
  %1038 = getelementptr inbounds float, float* %f0.1, i64 %1036
  %t327.us.us.5.prol = load float, float* %1038, align 4, !tbaa !217
  %1039 = fmul float %t327.us.us.5.prol, %t327.us.us.5.prol
  %1040 = fmul float %t326.us.us.5.prol, %t326.us.us.5.prol
  %1041 = fsub float %1040, %1039
  %1042 = fadd float %1041, %c_real
  %1043 = add nuw nsw i64 %1034, 64
  %1044 = getelementptr inbounds float, float* %f0.0, i64 %1043
  store float %1042, float* %1044, align 4, !tbaa !215
  %1045 = load float, float* %1037, align 4, !tbaa !215
  %1046 = fmul float %t327.us.us.5.prol, %1045
  %1047 = fmul float %1046, 2.000000e+00
  %1048 = fadd float %1047, %c_imag
  %1049 = getelementptr inbounds float, float* %f0.1, i64 %1043
  store float %1048, float* %1049, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.5.prol.loopexit"

"for f0.s1.r4$x.us.us.5.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.5.prol", %"end for f0.s1.r4$x.loopexit.us.us.4"
  %indvars.iv207.5.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.5.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us.4" ]
  br i1 %132, label %"end for f0.s1.r4$x.loopexit.us.us.5", label %"for f0.s1.r4$x.us.us.5"

"for f0.s1.r4$x.us.us.5":                         ; preds = %"for f0.s1.r4$x.us.us.5.prol.loopexit", %"for f0.s1.r4$x.us.us.5"
  %indvars.iv207.5 = phi i64 [ %indvars.iv.next208.5.1, %"for f0.s1.r4$x.us.us.5" ], [ %indvars.iv207.5.unr.ph, %"for f0.s1.r4$x.us.us.5.prol.loopexit" ]
  %1050 = trunc i64 %indvars.iv207.5 to i32
  %1051 = shl i32 %1050, 6
  %t325.us.us.5 = add i32 %1033, %1051
  %1052 = sext i32 %t325.us.us.5 to i64
  %1053 = add nsw i64 %1052, -64
  %1054 = getelementptr inbounds float, float* %f0.0, i64 %1053
  %t326.us.us.5 = load float, float* %1054, align 4, !tbaa !215
  %1055 = getelementptr inbounds float, float* %f0.1, i64 %1053
  %t327.us.us.5 = load float, float* %1055, align 4, !tbaa !217
  %1056 = fmul float %t327.us.us.5, %t327.us.us.5
  %1057 = fmul float %t326.us.us.5, %t326.us.us.5
  %1058 = fsub float %1057, %1056
  %1059 = fadd float %1058, %c_real
  %1060 = shl nsw i64 %indvars.iv207.5, 6
  %1061 = add nuw nsw i64 %1034, %1060
  %1062 = getelementptr inbounds float, float* %f0.0, i64 %1061
  store float %1059, float* %1062, align 4, !tbaa !215
  %1063 = load float, float* %1054, align 4, !tbaa !215
  %1064 = fmul float %t327.us.us.5, %1063
  %1065 = fmul float %1064, 2.000000e+00
  %1066 = fadd float %1065, %c_imag
  %1067 = getelementptr inbounds float, float* %f0.1, i64 %1061
  store float %1066, float* %1067, align 4, !tbaa !217
  %indvars.iv.next208.5 = add nuw nsw i64 %indvars.iv207.5, 1
  %1068 = trunc i64 %indvars.iv.next208.5 to i32
  %1069 = shl i32 %1068, 6
  %t325.us.us.5.1 = add i32 %1033, %1069
  %1070 = sext i32 %t325.us.us.5.1 to i64
  %1071 = add nsw i64 %1070, -64
  %1072 = getelementptr inbounds float, float* %f0.0, i64 %1071
  %t326.us.us.5.1 = load float, float* %1072, align 4, !tbaa !215
  %1073 = getelementptr inbounds float, float* %f0.1, i64 %1071
  %t327.us.us.5.1 = load float, float* %1073, align 4, !tbaa !217
  %1074 = fmul float %t327.us.us.5.1, %t327.us.us.5.1
  %1075 = fmul float %t326.us.us.5.1, %t326.us.us.5.1
  %1076 = fsub float %1075, %1074
  %1077 = fadd float %1076, %c_real
  %1078 = shl nsw i64 %indvars.iv.next208.5, 6
  %1079 = add nuw nsw i64 %1034, %1078
  %1080 = getelementptr inbounds float, float* %f0.0, i64 %1079
  store float %1077, float* %1080, align 4, !tbaa !215
  %1081 = load float, float* %1072, align 4, !tbaa !215
  %1082 = fmul float %t327.us.us.5.1, %1081
  %1083 = fmul float %1082, 2.000000e+00
  %1084 = fadd float %1083, %c_imag
  %1085 = getelementptr inbounds float, float* %f0.1, i64 %1079
  store float %1084, float* %1085, align 4, !tbaa !217
  %indvars.iv.next208.5.1 = add nsw i64 %indvars.iv207.5, 2
  %1086 = icmp eq i64 %indvars.iv.next208.5.1, %101
  br i1 %1086, label %"end for f0.s1.r4$x.loopexit.us.us.5", label %"for f0.s1.r4$x.us.us.5"

"end for f0.s1.r4$x.loopexit.us.us.5":            ; preds = %"for f0.s1.r4$x.us.us.5", %"for f0.s1.r4$x.us.us.5.prol.loopexit"
  %1087 = or i32 %298, 6
  %1088 = add nsw i64 %299, %indvars.iv.next213.5
  br i1 %lcmp.mod310, label %"for f0.s1.r4$x.us.us.6.prol", label %"for f0.s1.r4$x.us.us.6.prol.loopexit"

"for f0.s1.r4$x.us.us.6.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us.5"
  %t325.us.us.6.prol = add i32 %1087, 64
  %1089 = sext i32 %t325.us.us.6.prol to i64
  %1090 = add nsw i64 %1089, -64
  %1091 = getelementptr inbounds float, float* %f0.0, i64 %1090
  %t326.us.us.6.prol = load float, float* %1091, align 4, !tbaa !215
  %1092 = getelementptr inbounds float, float* %f0.1, i64 %1090
  %t327.us.us.6.prol = load float, float* %1092, align 4, !tbaa !217
  %1093 = fmul float %t327.us.us.6.prol, %t327.us.us.6.prol
  %1094 = fmul float %t326.us.us.6.prol, %t326.us.us.6.prol
  %1095 = fsub float %1094, %1093
  %1096 = fadd float %1095, %c_real
  %1097 = add nuw nsw i64 %1088, 64
  %1098 = getelementptr inbounds float, float* %f0.0, i64 %1097
  store float %1096, float* %1098, align 4, !tbaa !215
  %1099 = load float, float* %1091, align 4, !tbaa !215
  %1100 = fmul float %t327.us.us.6.prol, %1099
  %1101 = fmul float %1100, 2.000000e+00
  %1102 = fadd float %1101, %c_imag
  %1103 = getelementptr inbounds float, float* %f0.1, i64 %1097
  store float %1102, float* %1103, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.6.prol.loopexit"

"for f0.s1.r4$x.us.us.6.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.6.prol", %"end for f0.s1.r4$x.loopexit.us.us.5"
  %indvars.iv207.6.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.6.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us.5" ]
  br i1 %134, label %"end for f0.s1.r4$x.loopexit.us.us.6", label %"for f0.s1.r4$x.us.us.6"

"for f0.s1.r4$x.us.us.6":                         ; preds = %"for f0.s1.r4$x.us.us.6.prol.loopexit", %"for f0.s1.r4$x.us.us.6"
  %indvars.iv207.6 = phi i64 [ %indvars.iv.next208.6.1, %"for f0.s1.r4$x.us.us.6" ], [ %indvars.iv207.6.unr.ph, %"for f0.s1.r4$x.us.us.6.prol.loopexit" ]
  %1104 = trunc i64 %indvars.iv207.6 to i32
  %1105 = shl i32 %1104, 6
  %t325.us.us.6 = add i32 %1087, %1105
  %1106 = sext i32 %t325.us.us.6 to i64
  %1107 = add nsw i64 %1106, -64
  %1108 = getelementptr inbounds float, float* %f0.0, i64 %1107
  %t326.us.us.6 = load float, float* %1108, align 4, !tbaa !215
  %1109 = getelementptr inbounds float, float* %f0.1, i64 %1107
  %t327.us.us.6 = load float, float* %1109, align 4, !tbaa !217
  %1110 = fmul float %t327.us.us.6, %t327.us.us.6
  %1111 = fmul float %t326.us.us.6, %t326.us.us.6
  %1112 = fsub float %1111, %1110
  %1113 = fadd float %1112, %c_real
  %1114 = shl nsw i64 %indvars.iv207.6, 6
  %1115 = add nuw nsw i64 %1088, %1114
  %1116 = getelementptr inbounds float, float* %f0.0, i64 %1115
  store float %1113, float* %1116, align 4, !tbaa !215
  %1117 = load float, float* %1108, align 4, !tbaa !215
  %1118 = fmul float %t327.us.us.6, %1117
  %1119 = fmul float %1118, 2.000000e+00
  %1120 = fadd float %1119, %c_imag
  %1121 = getelementptr inbounds float, float* %f0.1, i64 %1115
  store float %1120, float* %1121, align 4, !tbaa !217
  %indvars.iv.next208.6 = add nuw nsw i64 %indvars.iv207.6, 1
  %1122 = trunc i64 %indvars.iv.next208.6 to i32
  %1123 = shl i32 %1122, 6
  %t325.us.us.6.1 = add i32 %1087, %1123
  %1124 = sext i32 %t325.us.us.6.1 to i64
  %1125 = add nsw i64 %1124, -64
  %1126 = getelementptr inbounds float, float* %f0.0, i64 %1125
  %t326.us.us.6.1 = load float, float* %1126, align 4, !tbaa !215
  %1127 = getelementptr inbounds float, float* %f0.1, i64 %1125
  %t327.us.us.6.1 = load float, float* %1127, align 4, !tbaa !217
  %1128 = fmul float %t327.us.us.6.1, %t327.us.us.6.1
  %1129 = fmul float %t326.us.us.6.1, %t326.us.us.6.1
  %1130 = fsub float %1129, %1128
  %1131 = fadd float %1130, %c_real
  %1132 = shl nsw i64 %indvars.iv.next208.6, 6
  %1133 = add nuw nsw i64 %1088, %1132
  %1134 = getelementptr inbounds float, float* %f0.0, i64 %1133
  store float %1131, float* %1134, align 4, !tbaa !215
  %1135 = load float, float* %1126, align 4, !tbaa !215
  %1136 = fmul float %t327.us.us.6.1, %1135
  %1137 = fmul float %1136, 2.000000e+00
  %1138 = fadd float %1137, %c_imag
  %1139 = getelementptr inbounds float, float* %f0.1, i64 %1133
  store float %1138, float* %1139, align 4, !tbaa !217
  %indvars.iv.next208.6.1 = add nsw i64 %indvars.iv207.6, 2
  %1140 = icmp eq i64 %indvars.iv.next208.6.1, %101
  br i1 %1140, label %"end for f0.s1.r4$x.loopexit.us.us.6", label %"for f0.s1.r4$x.us.us.6"

"end for f0.s1.r4$x.loopexit.us.us.6":            ; preds = %"for f0.s1.r4$x.us.us.6", %"for f0.s1.r4$x.us.us.6.prol.loopexit"
  %1141 = or i32 %298, 7
  %1142 = add nsw i64 %299, %indvars.iv.next213.6
  br i1 %lcmp.mod312, label %"for f0.s1.r4$x.us.us.7.prol", label %"for f0.s1.r4$x.us.us.7.prol.loopexit"

"for f0.s1.r4$x.us.us.7.prol":                    ; preds = %"end for f0.s1.r4$x.loopexit.us.us.6"
  %t325.us.us.7.prol = add i32 %1141, 64
  %1143 = sext i32 %t325.us.us.7.prol to i64
  %1144 = add nsw i64 %1143, -64
  %1145 = getelementptr inbounds float, float* %f0.0, i64 %1144
  %t326.us.us.7.prol = load float, float* %1145, align 4, !tbaa !215
  %1146 = getelementptr inbounds float, float* %f0.1, i64 %1144
  %t327.us.us.7.prol = load float, float* %1146, align 4, !tbaa !217
  %1147 = fmul float %t327.us.us.7.prol, %t327.us.us.7.prol
  %1148 = fmul float %t326.us.us.7.prol, %t326.us.us.7.prol
  %1149 = fsub float %1148, %1147
  %1150 = fadd float %1149, %c_real
  %1151 = add nuw nsw i64 %1142, 64
  %1152 = getelementptr inbounds float, float* %f0.0, i64 %1151
  store float %1150, float* %1152, align 4, !tbaa !215
  %1153 = load float, float* %1145, align 4, !tbaa !215
  %1154 = fmul float %t327.us.us.7.prol, %1153
  %1155 = fmul float %1154, 2.000000e+00
  %1156 = fadd float %1155, %c_imag
  %1157 = getelementptr inbounds float, float* %f0.1, i64 %1151
  store float %1156, float* %1157, align 4, !tbaa !217
  br label %"for f0.s1.r4$x.us.us.7.prol.loopexit"

"for f0.s1.r4$x.us.us.7.prol.loopexit":           ; preds = %"for f0.s1.r4$x.us.us.7.prol", %"end for f0.s1.r4$x.loopexit.us.us.6"
  %indvars.iv207.7.unr.ph = phi i64 [ 2, %"for f0.s1.r4$x.us.us.7.prol" ], [ 1, %"end for f0.s1.r4$x.loopexit.us.us.6" ]
  br i1 %136, label %"end for f0.s1.r4$x.loopexit.us.us.7", label %"for f0.s1.r4$x.us.us.7"

"for f0.s1.r4$x.us.us.7":                         ; preds = %"for f0.s1.r4$x.us.us.7.prol.loopexit", %"for f0.s1.r4$x.us.us.7"
  %indvars.iv207.7 = phi i64 [ %indvars.iv.next208.7.1, %"for f0.s1.r4$x.us.us.7" ], [ %indvars.iv207.7.unr.ph, %"for f0.s1.r4$x.us.us.7.prol.loopexit" ]
  %1158 = trunc i64 %indvars.iv207.7 to i32
  %1159 = shl i32 %1158, 6
  %t325.us.us.7 = add i32 %1141, %1159
  %1160 = sext i32 %t325.us.us.7 to i64
  %1161 = add nsw i64 %1160, -64
  %1162 = getelementptr inbounds float, float* %f0.0, i64 %1161
  %t326.us.us.7 = load float, float* %1162, align 4, !tbaa !215
  %1163 = getelementptr inbounds float, float* %f0.1, i64 %1161
  %t327.us.us.7 = load float, float* %1163, align 4, !tbaa !217
  %1164 = fmul float %t327.us.us.7, %t327.us.us.7
  %1165 = fmul float %t326.us.us.7, %t326.us.us.7
  %1166 = fsub float %1165, %1164
  %1167 = fadd float %1166, %c_real
  %1168 = shl nsw i64 %indvars.iv207.7, 6
  %1169 = add nuw nsw i64 %1142, %1168
  %1170 = getelementptr inbounds float, float* %f0.0, i64 %1169
  store float %1167, float* %1170, align 4, !tbaa !215
  %1171 = load float, float* %1162, align 4, !tbaa !215
  %1172 = fmul float %t327.us.us.7, %1171
  %1173 = fmul float %1172, 2.000000e+00
  %1174 = fadd float %1173, %c_imag
  %1175 = getelementptr inbounds float, float* %f0.1, i64 %1169
  store float %1174, float* %1175, align 4, !tbaa !217
  %indvars.iv.next208.7 = add nuw nsw i64 %indvars.iv207.7, 1
  %1176 = trunc i64 %indvars.iv.next208.7 to i32
  %1177 = shl i32 %1176, 6
  %t325.us.us.7.1 = add i32 %1141, %1177
  %1178 = sext i32 %t325.us.us.7.1 to i64
  %1179 = add nsw i64 %1178, -64
  %1180 = getelementptr inbounds float, float* %f0.0, i64 %1179
  %t326.us.us.7.1 = load float, float* %1180, align 4, !tbaa !215
  %1181 = getelementptr inbounds float, float* %f0.1, i64 %1179
  %t327.us.us.7.1 = load float, float* %1181, align 4, !tbaa !217
  %1182 = fmul float %t327.us.us.7.1, %t327.us.us.7.1
  %1183 = fmul float %t326.us.us.7.1, %t326.us.us.7.1
  %1184 = fsub float %1183, %1182
  %1185 = fadd float %1184, %c_real
  %1186 = shl nsw i64 %indvars.iv.next208.7, 6
  %1187 = add nuw nsw i64 %1142, %1186
  %1188 = getelementptr inbounds float, float* %f0.0, i64 %1187
  store float %1185, float* %1188, align 4, !tbaa !215
  %1189 = load float, float* %1180, align 4, !tbaa !215
  %1190 = fmul float %t327.us.us.7.1, %1189
  %1191 = fmul float %1190, 2.000000e+00
  %1192 = fadd float %1191, %c_imag
  %1193 = getelementptr inbounds float, float* %f0.1, i64 %1187
  store float %1192, float* %1193, align 4, !tbaa !217
  %indvars.iv.next208.7.1 = add nsw i64 %indvars.iv207.7, 2
  %1194 = icmp eq i64 %indvars.iv.next208.7.1, %101
  br i1 %1194, label %"end for f0.s1.r4$x.loopexit.us.us.7", label %"for f0.s1.r4$x.us.us.7"

"end for f0.s1.r4$x.loopexit.us.us.7":            ; preds = %"for f0.s1.r4$x.us.us.7", %"for f0.s1.r4$x.us.us.7.prol.loopexit"
  %indvars.iv.next218 = add nsw i64 %indvars.iv217, 1
  %1195 = trunc i64 %indvars.iv217 to i32
  %1196 = icmp eq i32 %152, %1195
  br i1 %1196, label %"consume f0", label %"for f0.s1.v1.us"
}

memory_profiler_mandelbrot.generator: /home/ubuntu/Parallel-IR/lib/Transforms/Tapir/CilkABI.cpp:1056: bool llvm::cilk::populateDetachedCFG(const llvm::DetachInst&, llvm::DominatorTree&, llvm::SmallPtrSetImpl<llvm::BasicBlock*>&, llvm::SmallVectorImpl<llvm::BasicBlock*>&, llvm::SmallPtrSetImpl<llvm::BasicBlock*>&, bool, bool): Assertion `!isa<ReturnInst>(BB->getTerminator()) && "EH block terminated by return."' failed.