Advertisement
Guest User

Untitled

a guest
Feb 24th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 21.88 KB | None | 0 0
  1. /*
  2. **
  3. ** LINPACK.C Linpack benchmark, calculates FLOPS.
  4. ** (FLoating Point Operations Per Second)
  5. **
  6. ** Translated to C by Bonnie Toy 5/88
  7. **
  8. ** Modified by Will Menninger, 10/93, with these features:
  9. ** (modified on 2/25/94 to fix a problem with daxpy for
  10. ** unequal increments or equal increments not equal to 1.
  11. ** Jack Dongarra)
  12. **
  13. ** - Defaults to double precision.
  14. ** - Averages ROLLed and UNROLLed performance.
  15. ** - User selectable array sizes.
  16. ** - Automatically does enough repetitions to take at least 10 CPU seconds.
  17. ** - Prints machine precision.
  18. ** - ANSI prototyping.
  19. **
  20. ** To compile: cc -O -o linpack linpack.c -lm
  21. **
  22. **
  23. */
  24.  
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <math.h>
  28. #include <time.h>
  29. #include <float.h>
  30.  
  31. #define DP
  32.  
  33. #ifdef SP
  34. #define ZERO 0.0
  35. #define ONE 1.0
  36. #define PREC "Single"
  37. #define BASE10DIG FLT_DIG
  38.  
  39. typedef float REAL;
  40. #endif
  41.  
  42. #ifdef DP
  43. #define ZERO 0.0e0
  44. #define ONE 1.0e0
  45. #define PREC "Double"
  46. #define BASE10DIG DBL_DIG
  47.  
  48. typedef double REAL;
  49. #endif
  50.  
  51. static REAL linpack (long nreps,int arsize);
  52. static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma);
  53. static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
  54. static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
  55. static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
  56. static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy);
  57. static void dscal_r (int n,REAL da,REAL *dx,int incx);
  58. static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
  59. static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy);
  60. static void dscal_ur (int n,REAL da,REAL *dx,int incx);
  61. static int idamax (int n,REAL *dx,int incx);
  62. static REAL second (void);
  63.  
  64. static void *mempool;
  65.  
  66.  
  67. void main(void)
  68.  
  69. {
  70. char buf[80];
  71. int arsize;
  72. long arsize2d,memreq,nreps;
  73. size_t malloc_arg;
  74.  
  75. time_t start, end;
  76. double elapsed; // seconds
  77. start = time(NULL);
  78. int terminate = 1;
  79.  
  80. //while (1)
  81. while (terminate)
  82. {
  83. end = time(NULL);
  84. elapsed = difftime(end, start);
  85. if (elapsed >= 120.0 /* seconds */)
  86. {
  87. terminate = 0;
  88. }
  89. else
  90. /*printf("Enter array size (q to quit) [200]: ");
  91. fgets(buf,79,stdin);
  92. if (buf[0]=='q' || buf[0]=='Q')
  93. break;
  94. if (buf[0]=='\0' || buf[0]=='\n')
  95. arsize=200;
  96. else
  97. arsize=atoi(buf);*/
  98. {
  99. arsize=200;
  100. arsize/=2;
  101. arsize*=2;
  102. if (arsize<10)
  103. {
  104. printf("Too small.\n");
  105. continue;
  106. }
  107. arsize2d = (long)arsize*(long)arsize;
  108. memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
  109. printf("Memory required: %ldK.\n",(memreq+512L)>>10);
  110. malloc_arg=(size_t)memreq;
  111. if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
  112. {
  113. printf("Not enough memory available for given array size.\n\n");
  114. continue;
  115. }
  116. printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
  117. printf("Machine precision: %d digits.\n",BASE10DIG);
  118. printf("Array size %d X %d.\n",arsize,arsize);
  119. printf("Average rolled and unrolled performance:\n\n");
  120. printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
  121. printf("----------------------------------------------------\n");
  122. nreps=1;
  123. while (linpack(nreps,arsize)<10.)
  124. nreps*=2;
  125. free(mempool);
  126. printf("\n");
  127. }
  128. }
  129. }
  130.  
  131.  
  132. static REAL linpack(long nreps,int arsize)
  133.  
  134. {
  135. REAL *a,*b;
  136. REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
  137. int *ipvt,n,info,lda;
  138. long i,arsize2d;
  139.  
  140. lda = arsize;
  141. n = arsize/2;
  142. arsize2d = (long)arsize*(long)arsize;
  143. ops=((2.0*n*n*n)/3.0+2.0*n*n);
  144. a=(REAL *)mempool;
  145. b=a+arsize2d;
  146. ipvt=(int *)&b[arsize];
  147. tdgesl=0;
  148. tdgefa=0;
  149. totalt=second();
  150. for (i=0;i<nreps;i++)
  151. {
  152. matgen(a,lda,n,b,&norma);
  153. t1 = second();
  154. dgefa(a,lda,n,ipvt,&info,1);
  155. tdgefa += second()-t1;
  156. t1 = second();
  157. dgesl(a,lda,n,ipvt,b,0,1);
  158. tdgesl += second()-t1;
  159. }
  160. for (i=0;i<nreps;i++)
  161. {
  162. matgen(a,lda,n,b,&norma);
  163. t1 = second();
  164. dgefa(a,lda,n,ipvt,&info,0);
  165. tdgefa += second()-t1;
  166. t1 = second();
  167. dgesl(a,lda,n,ipvt,b,0,0);
  168. tdgesl += second()-t1;
  169. }
  170. totalt=second()-totalt;
  171. if (totalt<0.5 || tdgefa+tdgesl<0.2)
  172. return(0.);
  173. kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
  174. toverhead=totalt-tdgefa-tdgesl;
  175. if (tdgefa<0.)
  176. tdgefa=0.;
  177. if (tdgesl<0.)
  178. tdgesl=0.;
  179. if (toverhead<0.)
  180. toverhead=0.;
  181. printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
  182. nreps,totalt,100.*tdgefa/totalt,
  183. 100.*tdgesl/totalt,100.*toverhead/totalt,
  184. kflops);
  185. return(totalt);
  186. }
  187.  
  188.  
  189. /*
  190. ** For matgen,
  191. ** We would like to declare a[][lda], but c does not allow it. In this
  192. ** function, references to a[i][j] are written a[lda*i+j].
  193. */
  194. static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
  195.  
  196. {
  197. int init,i,j;
  198.  
  199. init = 1325;
  200. *norma = 0.0;
  201. for (j = 0; j < n; j++)
  202. for (i = 0; i < n; i++)
  203. {
  204. init = (int)((long)3125*(long)init % 65536L);
  205. a[lda*j+i] = (init - 32768.0)/16384.0;
  206. *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
  207. }
  208. for (i = 0; i < n; i++)
  209. b[i] = 0.0;
  210. for (j = 0; j < n; j++)
  211. for (i = 0; i < n; i++)
  212. b[i] = b[i] + a[lda*j+i];
  213. }
  214.  
  215.  
  216. /*
  217. **
  218. ** DGEFA benchmark
  219. **
  220. ** We would like to declare a[][lda], but c does not allow it. In this
  221. ** function, references to a[i][j] are written a[lda*i+j].
  222. **
  223. ** dgefa factors a double precision matrix by gaussian elimination.
  224. **
  225. ** dgefa is usually called by dgeco, but it can be called
  226. ** directly with a saving in time if rcond is not needed.
  227. ** (time for dgeco) = (1 + 9/n)*(time for dgefa) .
  228. **
  229. ** on entry
  230. **
  231. ** a REAL precision[n][lda]
  232. ** the matrix to be factored.
  233. **
  234. ** lda integer
  235. ** the leading dimension of the array a .
  236. **
  237. ** n integer
  238. ** the order of the matrix a .
  239. **
  240. ** on return
  241. **
  242. ** a an upper triangular matrix and the multipliers
  243. ** which were used to obtain it.
  244. ** the factorization can be written a = l*u where
  245. ** l is a product of permutation and unit lower
  246. ** triangular matrices and u is upper triangular.
  247. **
  248. ** ipvt integer[n]
  249. ** an integer vector of pivot indices.
  250. **
  251. ** info integer
  252. ** = 0 normal value.
  253. ** = k if u[k][k] .eq. 0.0 . this is not an error
  254. ** condition for this subroutine, but it does
  255. ** indicate that dgesl or dgedi will divide by zero
  256. ** if called. use rcond in dgeco for a reliable
  257. ** indication of singularity.
  258. **
  259. ** linpack. this version dated 08/14/78 .
  260. ** cleve moler, university of New Mexico, argonne national lab.
  261. **
  262. ** functions
  263. **
  264. ** blas daxpy,dscal,idamax
  265. **
  266. */
  267. static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
  268.  
  269. {
  270. REAL t;
  271. int idamax(),j,k,kp1,l,nm1;
  272.  
  273. /* gaussian elimination with partial pivoting */
  274.  
  275. if (roll)
  276. {
  277. *info = 0;
  278. nm1 = n - 1;
  279. if (nm1 >= 0)
  280. for (k = 0; k < nm1; k++)
  281. {
  282. kp1 = k + 1;
  283.  
  284. /* find l = pivot index */
  285.  
  286. l = idamax(n-k,&a[lda*k+k],1) + k;
  287. ipvt[k] = l;
  288.  
  289. /* zero pivot implies this column already
  290. triangularized */
  291.  
  292. if (a[lda*k+l] != ZERO)
  293. {
  294.  
  295. /* interchange if necessary */
  296.  
  297. if (l != k)
  298. {
  299. t = a[lda*k+l];
  300. a[lda*k+l] = a[lda*k+k];
  301. a[lda*k+k] = t;
  302. }
  303.  
  304. /* compute multipliers */
  305.  
  306. t = -ONE/a[lda*k+k];
  307. dscal_r(n-(k+1),t,&a[lda*k+k+1],1);
  308.  
  309. /* row elimination with column indexing */
  310.  
  311. for (j = kp1; j < n; j++)
  312. {
  313. t = a[lda*j+l];
  314. if (l != k)
  315. {
  316. a[lda*j+l] = a[lda*j+k];
  317. a[lda*j+k] = t;
  318. }
  319. daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
  320. }
  321. }
  322. else
  323. (*info) = k;
  324. }
  325. ipvt[n-1] = n-1;
  326. if (a[lda*(n-1)+(n-1)] == ZERO)
  327. (*info) = n-1;
  328. }
  329. else
  330. {
  331. *info = 0;
  332. nm1 = n - 1;
  333. if (nm1 >= 0)
  334. for (k = 0; k < nm1; k++)
  335. {
  336. kp1 = k + 1;
  337.  
  338. /* find l = pivot index */
  339.  
  340. l = idamax(n-k,&a[lda*k+k],1) + k;
  341. ipvt[k] = l;
  342.  
  343. /* zero pivot implies this column already
  344. triangularized */
  345.  
  346. if (a[lda*k+l] != ZERO)
  347. {
  348.  
  349. /* interchange if necessary */
  350.  
  351. if (l != k)
  352. {
  353. t = a[lda*k+l];
  354. a[lda*k+l] = a[lda*k+k];
  355. a[lda*k+k] = t;
  356. }
  357.  
  358. /* compute multipliers */
  359.  
  360. t = -ONE/a[lda*k+k];
  361. dscal_ur(n-(k+1),t,&a[lda*k+k+1],1);
  362.  
  363. /* row elimination with column indexing */
  364.  
  365. for (j = kp1; j < n; j++)
  366. {
  367. t = a[lda*j+l];
  368. if (l != k)
  369. {
  370. a[lda*j+l] = a[lda*j+k];
  371. a[lda*j+k] = t;
  372. }
  373. daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&a[lda*j+k+1],1);
  374. }
  375. }
  376. else
  377. (*info) = k;
  378. }
  379. ipvt[n-1] = n-1;
  380. if (a[lda*(n-1)+(n-1)] == ZERO)
  381. (*info) = n-1;
  382. }
  383. }
  384.  
  385.  
  386. /*
  387. **
  388. ** DGESL benchmark
  389. **
  390. ** We would like to declare a[][lda], but c does not allow it. In this
  391. ** function, references to a[i][j] are written a[lda*i+j].
  392. **
  393. ** dgesl solves the double precision system
  394. ** a * x = b or trans(a) * x = b
  395. ** using the factors computed by dgeco or dgefa.
  396. **
  397. ** on entry
  398. **
  399. ** a double precision[n][lda]
  400. ** the output from dgeco or dgefa.
  401. **
  402. ** lda integer
  403. ** the leading dimension of the array a .
  404. **
  405. ** n integer
  406. ** the order of the matrix a .
  407. **
  408. ** ipvt integer[n]
  409. ** the pivot vector from dgeco or dgefa.
  410. **
  411. ** b double precision[n]
  412. ** the right hand side vector.
  413. **
  414. ** job integer
  415. ** = 0 to solve a*x = b ,
  416. ** = nonzero to solve trans(a)*x = b where
  417. ** trans(a) is the transpose.
  418. **
  419. ** on return
  420. **
  421. ** b the solution vector x .
  422. **
  423. ** error condition
  424. **
  425. ** a division by zero will occur if the input factor contains a
  426. ** zero on the diagonal. technically this indicates singularity
  427. ** but it is often caused by improper arguments or improper
  428. ** setting of lda . it will not occur if the subroutines are
  429. ** called correctly and if dgeco has set rcond .gt. 0.0
  430. ** or dgefa has set info .eq. 0 .
  431. **
  432. ** to compute inverse(a) * c where c is a matrix
  433. ** with p columns
  434. ** dgeco(a,lda,n,ipvt,rcond,z)
  435. ** if (!rcond is too small){
  436. ** for (j=0,j<p,j++)
  437. ** dgesl(a,lda,n,ipvt,c[j][0],0);
  438. ** }
  439. **
  440. ** linpack. this version dated 08/14/78 .
  441. ** cleve moler, university of new mexico, argonne national lab.
  442. **
  443. ** functions
  444. **
  445. ** blas daxpy,ddot
  446. */
  447. static void dgesl(REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll)
  448.  
  449. {
  450. REAL t;
  451. int k,kb,l,nm1;
  452.  
  453. if (roll)
  454. {
  455. nm1 = n - 1;
  456. if (job == 0)
  457. {
  458.  
  459. /* job = 0 , solve a * x = b */
  460. /* first solve l*y = b */
  461.  
  462. if (nm1 >= 1)
  463. for (k = 0; k < nm1; k++)
  464. {
  465. l = ipvt[k];
  466. t = b[l];
  467. if (l != k)
  468. {
  469. b[l] = b[k];
  470. b[k] = t;
  471. }
  472. daxpy_r(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
  473. }
  474.  
  475. /* now solve u*x = y */
  476.  
  477. for (kb = 0; kb < n; kb++)
  478. {
  479. k = n - (kb + 1);
  480. b[k] = b[k]/a[lda*k+k];
  481. t = -b[k];
  482. daxpy_r(k,t,&a[lda*k+0],1,&b[0],1);
  483. }
  484. }
  485. else
  486. {
  487.  
  488. /* job = nonzero, solve trans(a) * x = b */
  489. /* first solve trans(u)*y = b */
  490.  
  491. for (k = 0; k < n; k++)
  492. {
  493. t = ddot_r(k,&a[lda*k+0],1,&b[0],1);
  494. b[k] = (b[k] - t)/a[lda*k+k];
  495. }
  496.  
  497. /* now solve trans(l)*x = y */
  498.  
  499. if (nm1 >= 1)
  500. for (kb = 1; kb < nm1; kb++)
  501. {
  502. k = n - (kb+1);
  503. b[k] = b[k] + ddot_r(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
  504. l = ipvt[k];
  505. if (l != k)
  506. {
  507. t = b[l];
  508. b[l] = b[k];
  509. b[k] = t;
  510. }
  511. }
  512. }
  513. }
  514. else
  515. {
  516. nm1 = n - 1;
  517. if (job == 0)
  518. {
  519.  
  520. /* job = 0 , solve a * x = b */
  521. /* first solve l*y = b */
  522.  
  523. if (nm1 >= 1)
  524. for (k = 0; k < nm1; k++)
  525. {
  526. l = ipvt[k];
  527. t = b[l];
  528. if (l != k)
  529. {
  530. b[l] = b[k];
  531. b[k] = t;
  532. }
  533. daxpy_ur(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1);
  534. }
  535.  
  536. /* now solve u*x = y */
  537.  
  538. for (kb = 0; kb < n; kb++)
  539. {
  540. k = n - (kb + 1);
  541. b[k] = b[k]/a[lda*k+k];
  542. t = -b[k];
  543. daxpy_ur(k,t,&a[lda*k+0],1,&b[0],1);
  544. }
  545. }
  546. else
  547. {
  548.  
  549. /* job = nonzero, solve trans(a) * x = b */
  550. /* first solve trans(u)*y = b */
  551.  
  552. for (k = 0; k < n; k++)
  553. {
  554. t = ddot_ur(k,&a[lda*k+0],1,&b[0],1);
  555. b[k] = (b[k] - t)/a[lda*k+k];
  556. }
  557.  
  558. /* now solve trans(l)*x = y */
  559.  
  560. if (nm1 >= 1)
  561. for (kb = 1; kb < nm1; kb++)
  562. {
  563. k = n - (kb+1);
  564. b[k] = b[k] + ddot_ur(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
  565. l = ipvt[k];
  566. if (l != k)
  567. {
  568. t = b[l];
  569. b[l] = b[k];
  570. b[k] = t;
  571. }
  572. }
  573. }
  574. }
  575. }
  576.  
  577.  
  578.  
  579. /*
  580. ** Constant times a vector plus a vector.
  581. ** Jack Dongarra, linpack, 3/11/78.
  582. ** ROLLED version
  583. */
  584. static void daxpy_r(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
  585.  
  586. {
  587. int i,ix,iy;
  588.  
  589. if (n <= 0)
  590. return;
  591. if (da == ZERO)
  592. return;
  593.  
  594. if (incx != 1 || incy != 1)
  595. {
  596.  
  597. /* code for unequal increments or equal increments != 1 */
  598.  
  599. ix = 1;
  600. iy = 1;
  601. if(incx < 0) ix = (-n+1)*incx + 1;
  602. if(incy < 0)iy = (-n+1)*incy + 1;
  603. for (i = 0;i < n; i++)
  604. {
  605. dy[iy] = dy[iy] + da*dx[ix];
  606. ix = ix + incx;
  607. iy = iy + incy;
  608. }
  609. return;
  610. }
  611.  
  612. /* code for both increments equal to 1 */
  613.  
  614. for (i = 0;i < n; i++)
  615. dy[i] = dy[i] + da*dx[i];
  616. }
  617.  
  618.  
  619. /*
  620. ** Forms the dot product of two vectors.
  621. ** Jack Dongarra, linpack, 3/11/78.
  622. ** ROLLED version
  623. */
  624. static REAL ddot_r(int n,REAL *dx,int incx,REAL *dy,int incy)
  625.  
  626. {
  627. REAL dtemp;
  628. int i,ix,iy;
  629.  
  630. dtemp = ZERO;
  631.  
  632. if (n <= 0)
  633. return(ZERO);
  634.  
  635. if (incx != 1 || incy != 1)
  636. {
  637.  
  638. /* code for unequal increments or equal increments != 1 */
  639.  
  640. ix = 0;
  641. iy = 0;
  642. if (incx < 0) ix = (-n+1)*incx;
  643. if (incy < 0) iy = (-n+1)*incy;
  644. for (i = 0;i < n; i++)
  645. {
  646. dtemp = dtemp + dx[ix]*dy[iy];
  647. ix = ix + incx;
  648. iy = iy + incy;
  649. }
  650. return(dtemp);
  651. }
  652.  
  653. /* code for both increments equal to 1 */
  654.  
  655. for (i=0;i < n; i++)
  656. dtemp = dtemp + dx[i]*dy[i];
  657. return(dtemp);
  658. }
  659.  
  660.  
  661. /*
  662. ** Scales a vector by a constant.
  663. ** Jack Dongarra, linpack, 3/11/78.
  664. ** ROLLED version
  665. */
  666. static void dscal_r(int n,REAL da,REAL *dx,int incx)
  667.  
  668. {
  669. int i,nincx;
  670.  
  671. if (n <= 0)
  672. return;
  673. if (incx != 1)
  674. {
  675.  
  676. /* code for increment not equal to 1 */
  677.  
  678. nincx = n*incx;
  679. for (i = 0; i < nincx; i = i + incx)
  680. dx[i] = da*dx[i];
  681. return;
  682. }
  683.  
  684. /* code for increment equal to 1 */
  685.  
  686. for (i = 0; i < n; i++)
  687. dx[i] = da*dx[i];
  688. }
  689.  
  690.  
  691. /*
  692. ** constant times a vector plus a vector.
  693. ** Jack Dongarra, linpack, 3/11/78.
  694. ** UNROLLED version
  695. */
  696. static void daxpy_ur(int n,REAL da,REAL *dx,int incx,REAL *dy,int incy)
  697.  
  698. {
  699. int i,ix,iy,m;
  700.  
  701. if (n <= 0)
  702. return;
  703. if (da == ZERO)
  704. return;
  705.  
  706. if (incx != 1 || incy != 1)
  707. {
  708.  
  709. /* code for unequal increments or equal increments != 1 */
  710.  
  711. ix = 1;
  712. iy = 1;
  713. if(incx < 0) ix = (-n+1)*incx + 1;
  714. if(incy < 0)iy = (-n+1)*incy + 1;
  715. for (i = 0;i < n; i++)
  716. {
  717. dy[iy] = dy[iy] + da*dx[ix];
  718. ix = ix + incx;
  719. iy = iy + incy;
  720. }
  721. return;
  722. }
  723.  
  724. /* code for both increments equal to 1 */
  725.  
  726. m = n % 4;
  727. if ( m != 0)
  728. {
  729. for (i = 0; i < m; i++)
  730. dy[i] = dy[i] + da*dx[i];
  731. if (n < 4)
  732. return;
  733. }
  734. for (i = m; i < n; i = i + 4)
  735. {
  736. dy[i] = dy[i] + da*dx[i];
  737. dy[i+1] = dy[i+1] + da*dx[i+1];
  738. dy[i+2] = dy[i+2] + da*dx[i+2];
  739. dy[i+3] = dy[i+3] + da*dx[i+3];
  740. }
  741. }
  742.  
  743.  
  744. /*
  745. ** Forms the dot product of two vectors.
  746. ** Jack Dongarra, linpack, 3/11/78.
  747. ** UNROLLED version
  748. */
  749. static REAL ddot_ur(int n,REAL *dx,int incx,REAL *dy,int incy)
  750.  
  751. {
  752. REAL dtemp;
  753. int i,ix,iy,m;
  754.  
  755. dtemp = ZERO;
  756.  
  757. if (n <= 0)
  758. return(ZERO);
  759.  
  760. if (incx != 1 || incy != 1)
  761. {
  762.  
  763. /* code for unequal increments or equal increments != 1 */
  764.  
  765. ix = 0;
  766. iy = 0;
  767. if (incx < 0) ix = (-n+1)*incx;
  768. if (incy < 0) iy = (-n+1)*incy;
  769. for (i = 0;i < n; i++)
  770. {
  771. dtemp = dtemp + dx[ix]*dy[iy];
  772. ix = ix + incx;
  773. iy = iy + incy;
  774. }
  775. return(dtemp);
  776. }
  777.  
  778. /* code for both increments equal to 1 */
  779.  
  780. m = n % 5;
  781. if (m != 0)
  782. {
  783. for (i = 0; i < m; i++)
  784. dtemp = dtemp + dx[i]*dy[i];
  785. if (n < 5)
  786. return(dtemp);
  787. }
  788. for (i = m; i < n; i = i + 5)
  789. {
  790. dtemp = dtemp + dx[i]*dy[i] +
  791. dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
  792. dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
  793. }
  794. return(dtemp);
  795. }
  796.  
  797.  
  798. /*
  799. ** Scales a vector by a constant.
  800. ** Jack Dongarra, linpack, 3/11/78.
  801. ** UNROLLED version
  802. */
  803. static void dscal_ur(int n,REAL da,REAL *dx,int incx)
  804.  
  805. {
  806. int i,m,nincx;
  807.  
  808. if (n <= 0)
  809. return;
  810. if (incx != 1)
  811. {
  812.  
  813. /* code for increment not equal to 1 */
  814.  
  815. nincx = n*incx;
  816. for (i = 0; i < nincx; i = i + incx)
  817. dx[i] = da*dx[i];
  818. return;
  819. }
  820.  
  821. /* code for increment equal to 1 */
  822.  
  823. m = n % 5;
  824. if (m != 0)
  825. {
  826. for (i = 0; i < m; i++)
  827. dx[i] = da*dx[i];
  828. if (n < 5)
  829. return;
  830. }
  831. for (i = m; i < n; i = i + 5)
  832. {
  833. dx[i] = da*dx[i];
  834. dx[i+1] = da*dx[i+1];
  835. dx[i+2] = da*dx[i+2];
  836. dx[i+3] = da*dx[i+3];
  837. dx[i+4] = da*dx[i+4];
  838. }
  839. }
  840.  
  841.  
  842. /*
  843. ** Finds the index of element having max. absolute value.
  844. ** Jack Dongarra, linpack, 3/11/78.
  845. */
  846. static int idamax(int n,REAL *dx,int incx)
  847.  
  848. {
  849. REAL dmax;
  850. int i, ix, itemp;
  851.  
  852. if (n < 1)
  853. return(-1);
  854. if (n ==1 )
  855. return(0);
  856. if(incx != 1)
  857. {
  858.  
  859. /* code for increment not equal to 1 */
  860.  
  861. ix = 1;
  862. dmax = fabs((double)dx[0]);
  863. ix = ix + incx;
  864. for (i = 1; i < n; i++)
  865. {
  866. if(fabs((double)dx[ix]) > dmax)
  867. {
  868. itemp = i;
  869. dmax = fabs((double)dx[ix]);
  870. }
  871. ix = ix + incx;
  872. }
  873. }
  874. else
  875. {
  876.  
  877. /* code for increment equal to 1 */
  878.  
  879. itemp = 0;
  880. dmax = fabs((double)dx[0]);
  881. for (i = 1; i < n; i++)
  882. if(fabs((double)dx[i]) > dmax)
  883. {
  884. itemp = i;
  885. dmax = fabs((double)dx[i]);
  886. }
  887. }
  888. return (itemp);
  889. }
  890.  
  891.  
  892. static REAL second(void)
  893.  
  894. {
  895. return ((REAL)((REAL)clock()/(REAL)CLOCKS_PER_SEC));
  896. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement