Advertisement
medmab

c to cuda help

Oct 25th, 2014
191
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 14.54 KB
  1. hi
  2. I want to convert this code in cuda, but I'm new to programming cuda. then who can help me.
  3.  
  4.  
  5. //
  6. // Author:  VA
  7. // Email:   contact@vashe.org
  8. // Version: 0.1
  9. // File:    turbo_example.c
  10. //
  11. // Rate 1/3 turbo coder simple example (for educational purposes)
  12. //
  13. // This program is free software; you can redistribute it and/or
  14. // modify it under the terms of the GNU General Public License
  15. // as published by the Free Software Foundation; either version 2
  16. // of the License, or (at your option) any later version.
  17. //
  18. // This program is distributed in the hope that it will be useful,
  19. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21. // GNU General Public License for more details.
  22. //
  23. // References:
  24. //
  25. //   1.S.A. Barbulescu, "Iterative Decoding of Turbo Codes and Other
  26. //     Concatenated Codes"
  27. //     http://www.itr.unisa.edu.au/~steven/thesis/sab.ps.gz
  28. //
  29. //   2.W.E. Ryan,"A Turbo Code Tutorial" New Mexico State University
  30. //     http://vada.skku.ac.kr/ClassInfo/digital-com2000/slides/turbo2c.pdf
  31. //
  32. //   3.M.C. Reed, S.S. Pietrobon, "Turbo-code termination schemes and a
  33. //     novel alternative for short frames" IEEE Int. Symp. on Personal,
  34. //     Indoor and Mobile Radio Commun., Taipei, Taiwan, pp. 354-358,
  35. //     Oct. 1996.
  36. //     http://www.itr.unisa.edu.au/~steven/turbo/PIMRC96.ps.gz
  37. //
  38. // Revisions: 0.1 fixed typo - La missing from modified_bcjr args
  39. //
  40. //
  41. #include <math.h>
  42. #include <stdio.h>
  43. #include <stdlib.h>
  44.  
  45. #define DEBUG     1   //set level of debug visibility [0=>off,1=>min,2=>max]
  46. #define NOISEOFF  0   //set to suppress noise in channel
  47.  
  48. #define N_ITERATION 4 //no. of turbo decoder iterations
  49. #define TBD -1        //trellis termination bits (inserted by encoder #1)
  50.  
  51. //  Block length in our example (6 databits + 2 termination bits)
  52. //  A practical turbo code must use a much larger block length to
  53. //  approach the Shannon limit.
  54. //
  55. #define N    8
  56.  
  57. //  Pseudo random permutation (of length = the block length).
  58. //  In our example we use a PN sequence with 0 appended
  59. //  See chapter 3 in [1] for some other possibilities.
  60. //
  61. int permutation[N] = {7,5,1,2,4,3,6,0};
  62.  
  63.  
  64. // tables for trellis are global (simple example from pp 38 in [1])
  65. //
  66. #define M  4      //no. of trellis states
  67.  
  68. int from[M][2];   //from[m][i] = next state (from state m with databit = i)
  69. int to[M][2];     //to[m][i] = previous state (to state m with databit = i)
  70. int parity[M][2]; //parity bit associated with transition from state m
  71. int term[M][2];   //term[m] = pair of data bits required to terminate trellis
  72.  
  73.  
  74. //  Normally distributed number generator (ubiquitous Box-Muller method)
  75. //
  76. double normal(void)
  77. {
  78. double x, y, rr, randn;
  79. do{
  80.       x  = (double) 2*rand()/RAND_MAX - 1.0; //uniform in range [-1,1]
  81.       y  = (double) 2*rand()/RAND_MAX - 1.0; //uniform in range [-1,1]
  82.       rr = x*x + y*y;
  83.   } while( rr >= 1 );
  84.   randn = x*sqrt((-2.0*log(rr))/rr);
  85. return(randn);
  86. }
  87.  
  88. //  modified BCJR algorithm (MAP decoder)
  89. //
  90. void modified_bcjr
  91. (
  92. int    is_term,      //indicates if trellis terminated
  93. double Lc,           //Lc = 2/(sigma*sigma) = channel reliability
  94. double La[N],        //apriori likelihood of each info bit
  95. double x_d[N],       //noisy data
  96. double p_d[N],       //noisy parity
  97. double Le[N]         //extrinsic log likelihood
  98. )
  99. {
  100. int    k, m, i;
  101. double xk_h, pk_h;      //databit & parity associated with a branch
  102. double gamma[N][M][2];  //gammas for total likelihoods
  103. double gammae[N][M][2]; //gammas for extrinsic likelihoods
  104. double pr0, pr1;        //extrinsic likelihood = pr1/pr0
  105. double alpha[N+1][M];   //probability of entering branch via state m
  106. double beta[N+1][M];    //probability of exiting branch via state m
  107. double total;           //used for normalising alpha's and beta's
  108.  
  109.   //calculate branch gamma's
  110.    for(k = 0; k < N; k++) //for each trellis stage
  111.    {
  112. for(m = 0; m < M; m++) //for each state
  113. {
  114. for(i = 0; i < 2; i++) //for each databit
  115. {
  116. //data associated with branch
  117. xk_h = i ? +1 : -1;            //map databit to PAM symbol
  118.  
  119. //parity associated with branch
  120. pk_h = parity[m][i] ? +1 : -1; //map parity bit to PAM symbol
  121.  
  122.                //used later to calculate alpha's and beta's
  123. gamma[k][m][i] = exp(0.5*(La[k] * xk_h +
  124.                          Lc * x_d[k] * xk_h +
  125.                          Lc * p_d[k] * pk_h));
  126.  
  127.                //used later to calculate extrinsic likelihood
  128. gammae[k][m][i] = exp(0.5*(Lc * p_d[k] * pk_h));
  129. }
  130. }
  131. }
  132.  
  133. //  Calculate state alpha's
  134. //
  135.    //  As the likelihood ratio for each stage k has a linear combination
  136.    //  of alpha terms in both the numerator and the denominator, we can
  137.    //  scale all the alpha's by any convenient scaling constant.
  138.    //
  139.    //  To help avoid underflow/overflow we normalise the alphas at each
  140.    //  stage so that sum across all states is unity.
  141.    //
  142.  
  143.    //  The encoders always start in state 0
  144. alpha[0][0] = 1;
  145. for(m = 1; m < M; m++)
  146. alpha[0][m] = 0;
  147.  
  148. for(k = 1; k <= N; k++)
  149. {
  150. total = 0;
  151.  
  152.    for(m = 0; m < M; m++)
  153.    {
  154. alpha[k][m] = alpha[k-1][to[m][0]] * gamma[k-1][to[m][0]][0] +
  155.              alpha[k-1][to[m][1]] * gamma[k-1][to[m][1]][1];
  156.  
  157. total += alpha[k][m];
  158. }
  159.  
  160. //normalise
  161. for(m = 0; m < M; m++)
  162. alpha[k][m] /= total;
  163. }
  164.  
  165. //  Calculate state beta's
  166. //
  167.    //  As the likelihood ratio for each stage k has a linear combination
  168.    //  of beta terms in both the numerator and the denominator, we can
  169.    //  scale all the beta's by any convenient scaling constant.
  170.    //
  171.    //  To help avoid underflow/overflow we normalise the betas at each
  172.    //  stage so that sum across all states is unity.
  173.    //
  174.  
  175. if(is_term)                 //if trellis terminated
  176. {
  177. //we know for sure the final state is 0
  178.    beta[N][0] = 1;
  179.    for(m = 1; m < M; m++)
  180.     beta[N][m] = 0;
  181. }
  182. else                       //else trellis not terminated
  183. {
  184. //we haven't a clue which is final state
  185. //so the best we can do is say they're all equally likely
  186.    for(m = 0; m < M; m++)
  187.     beta[N][m] = 1.0 / (double) M;
  188. }
  189.  
  190.    //iterate backwards through trellis
  191. for(k = N-1; k >= 0; k--)
  192. {
  193. total = 0;
  194. for(m = 0; m < 4; m++)
  195. {
  196. beta[k][m] = beta[k+1][from[m][0]] * gamma[k][m][0] +
  197.         beta[k+1][from[m][1]] * gamma[k][m][1];
  198.  
  199.  
  200. total += beta[k][m];
  201. }
  202.  
  203.        //normalise
  204. for(m = 0; m < 4; m++)
  205. beta[k][m] /= total;
  206. }
  207.  
  208.    //  Calculate extrinsic likelihood
  209.    //
  210. //  This is the information "gleaned" from the parity check
  211. //  Note the Ck's in equation 20 in [2] are different in the
  212. //  numerator and denominator. This is why the channel and
  213. //  apriori likelihoods can be brought out.
  214. //
  215. for(k = 0; k < N; k++)
  216. {
  217. pr0 = pr1 = 0;
  218. for(m = 0; m < 4; m++)
  219. {
  220. //we use gammae rather than gamma as we want the
  221. //extrinsic component of the overall likelihood
  222. pr1 += (alpha[k][m] * gammae[k][m][1] * beta[k+1][from[m][1]]);
  223. pr0 += (alpha[k][m] * gammae[k][m][0] * beta[k+1][from[m][0]]);
  224. }
  225. Le[k] = log(pr1 / pr0); //extrinsic likelihood
  226. }
  227.  
  228.    #if DEBUG > 1
  229.    for(k = 0; k < N; k++)
  230.    {
  231. for(m = 0; m < M; m++)
  232. {
  233. for(i = 0; i < 2; i++)
  234. {
  235. printf("gamma[%i][%i][%i]  = %f\t", k, m, i, gamma[k][m][i]);
  236. printf("gammae[%i][%i][%i] = %f\n", k, m, i, gammae[k][m][i]);
  237. }
  238. }
  239. printf("\n");
  240. }
  241.  
  242. for(k = 0; k <= N; k++)
  243. {
  244.    for(m = 0; m < M; m++)
  245. printf("alpha[%i][%i] = %f\n", k, m, alpha[k][m]);
  246. printf("\n");
  247. }
  248. for(k = 0; k <= N; k++)
  249. {
  250.    for(m = 0; m < M; m++)
  251. printf("beta[%i][%i] = %f\n", k, m, beta[k][m]);
  252. printf("\n");
  253. }
  254.    #endif
  255.  
  256. }
  257.  
  258. //
  259. //      +--------------------------> Xk
  260. //      |  fb
  261. //      |  +---------(+)-------+
  262. //      |  |          |        |
  263. //  Xk--+-(+)-+->[D]----->[D]--+
  264. //            |                |
  265. //            +--------------(+)---> Pk
  266. //
  267. //
  268. void gen_tab(void)
  269. {
  270. int m, i, b0, b1, fb, state;
  271.  
  272.    //generate tables for 4 state RSC encoder
  273. for(m = 0; m < M; m++) //for each starting state
  274. for(i = 0; i < 2; i++) //for each possible databit
  275. {
  276. b0 = (m >> 0) & 1; //bit 0 of state
  277. b1 = (m >> 1) & 1; //bit 1 of state
  278.  
  279. //parity from state m with databit i
  280. parity[m][i] = b0 ^ i;
  281.  
  282. //from[m][i] = next state from state m with databit i
  283. from[m][i]   = (b0<<1) + (i ^ b0 ^ b1);
  284. }
  285.  
  286.    //to[m][i] = previous state to state m with databit i
  287.    for(m = 0; m < M; m++)
  288.     for(i = 0; i < 2; i++)
  289. to[from[m][i]][i] = m;
  290.  
  291. //  Generate table of data bit pairs which terminate
  292. //  the trellis for a given state m
  293. //
  294. //  We simply set Xk equal to the feedback value to force
  295. //  the delay line to fill up with zeros.
  296. //
  297. for(m = 0; m < M; m++) //for each state
  298. {
  299. state = m;
  300. b0 = (state >> 0) & 1; //bit 0 of state
  301. b1 = (state >> 1) & 1; //bit 1 of state
  302. fb = b0 ^ b1;          //feedback bit
  303. term[m][0] = fb;       //will set X[N-2] = fb
  304.  
  305. state = from[m][fb];   //advance from state m with databit=fb
  306. b0 = (state >> 0) & 1; //bit 0 of state
  307. b1 = (state >> 1) & 1; //bit 1 of state
  308. fb = b0 ^ b1;          //feedback bit
  309. term[m][1] = fb;       //will set X[N-1] = fb
  310. }
  311. }
  312.  
  313. //
  314. //       +-----------> Xk
  315. //       |
  316. //       |
  317. //       |
  318. //  Xk---+--[E1]-----> P1k
  319. //       |
  320. //      [P]
  321. //       |
  322. //       +--[E2]-----> P2k
  323. //
  324. //
  325. void turbo_encode
  326. (
  327. int X[N],   //block of N-2 information bits + 2 to_be_decided bits
  328. int P1[N],  //encoder #1 parity bits
  329. int P2[N]   //encoder #2 parity bits
  330. )
  331. {
  332. int    k;      //trellis stage
  333. int    state;  //encoder state
  334. int    X_p[N]; //X_permuted = permuted bits
  335.  
  336. //encoder #1
  337. state = 0; //encoder always starts in state 0
  338. for(k = 0; k < N-2; k++)
  339. {
  340. P1[k] = parity[state][X[k]];
  341. state = from[state][X[k]];
  342. //printf("s[%i] = %i\n", k, state);
  343. }
  344.  
  345. //terminate encoder #1 trellis to state 0
  346. X[N-2]  = term[state][0];  //databit to feed a 0 into delay line
  347. X[N-1]  = term[state][1];  //databit to feed another 0 into delay line
  348.  
  349. P1[N-2] = parity[state][X[N-2]]; //parity from state with databitX[N-2]
  350. state   = from[state][X[N-2]];   //next state from current state
  351.    P1[N-1] = parity[state][X[N-1]]; //parity from state with databit=X[N-1]
  352. state   = from[state][X[N-1]];   //next state from current state
  353.  
  354. if(state != 0)
  355. {
  356. //should never get here
  357. printf("Error: Could not terminate encoder #1 trellis\n");
  358. exit(1);
  359. }
  360.  
  361. //permute tx databits for encoder #2
  362. for(k = 0; k < N; k++)
  363. X_p[k] = X[permutation[k]];
  364.  
  365. //encoder #2
  366. state = 0; //encoder always starts in state 0
  367. for(k = 0; k < N; k++)
  368. {
  369. P2[k] = parity[state][X_p[k]]; //parity from state with databit=X_p[k]
  370. state = from[state][X_p[k]];   //next state from current state
  371. }
  372.  
  373. //for(k = 0; k < N; k++)
  374. // printf("%i %i %i %i\n", X[k], P1[k], X_p[k], P2[k]);
  375.  
  376. }
  377.  
  378. void turbo_decode(
  379. double sigma,   //channel noise standard deviation
  380. double x_d[N],  //x_dash  = noisy data symbol
  381. double p1_d[N], //p1_dash = noisy parity#1 symbol
  382. double p2_d[N], //p2_dash = noisy parity#2 symbol
  383. double L_h[N],  //L_hat = likelihood of databit given entire observation
  384. int    X_h[N]   //X_hat = sliced MAP decisions
  385. )
  386. {
  387. int i, k;
  388.  
  389. double Le1[N];    //decoder #1 extrinsic likelihood
  390. double Le1_p[N];  //decoder #1 extrinsic likelihood permuted
  391. double Le2[N];    //decoder #2 extrinsic likelihood
  392. double Le2_ip[N]; //decoder #2 extrinsic likelihood inverse permuted
  393.    double Lc;        //channel reliability value
  394.  
  395.    Lc = 2.0 / (sigma*sigma); //requires sigma to be non-trivial
  396.  
  397.    //zero apriori information into very first iteration of BCJR
  398.    for(k = 0; k < N; k++)
  399. Le2_ip[k] = 0;
  400.  
  401.    for(i = 0; i < N_ITERATION; i++)
  402.    {
  403.     modified_bcjr(1, Lc, Le2_ip, x_d, p1_d, Le1);
  404.  
  405.        //permute decoder#1 likelihoods to match decoder#2 order
  406.     for(k = 0; k < N; k++)
  407.     Le1_p[k] = Le1[permutation[k]];
  408.  
  409.     modified_bcjr(0, Lc, Le1_p,  x_d, p2_d, Le2);
  410.  
  411.        //inverse permute decoder#2 likelihoods to match decoder#1 order
  412.     for(k = 0; k < N; k++)
  413.     Le2_ip[permutation[k]] = Le2[k];
  414.  
  415.        #if DEBUG > 0
  416. for(k = 0; k < N; k++)
  417. {
  418. printf("Le1[%i] = %f\t", k, Le1[k]);
  419. printf("Le2_ip[%i] = %f\t", k, Le2_ip[k]);
  420. //printf("Lc*x_d[%i] = %f", k, Lc*x_d[k]);
  421. printf("\n");
  422. }
  423. printf("\n");
  424. #endif
  425. }
  426.  
  427.    //calculate overall likelihoods and then slice'em
  428.    for(k = 0; k < N; k++)
  429.    {
  430. L_h[k] = Lc*x_d[k] + Le1[k] + Le2_ip[k]; //soft decision
  431. X_h[k] = (L_h[k] > 0.0) ? 1 : 0;         //hard decision
  432. }
  433. }
  434.  
  435. /*
  436. gcc turbo_example.c -lm -o t; t
  437. */
  438.  
  439. int main(void)
  440. {
  441.    //  Transmit bits for our simple example
  442.    //  The two last bits will be inserted later by encoder#1
  443.    //  to park it at state 0
  444.    //
  445.    int X[N] = {1,1,0,0,1,0,TBD,TBD};
  446.  
  447.    //noise standard deviation
  448. double sigma = 1.0;
  449.  
  450. int    P1[N];     //encoder #1 parity bits
  451. int    P2[N];     //encoder #2 parity bits
  452. double x[N];      //databit mapped to symbol
  453. double p1[N];     //encoder #1 parity bit mapped to symbol
  454. double p2[N];     //encoder #2 parity bit mapped to symbol
  455. double x_d[N];    //x_dash  = noisy data symbol
  456. double p1_d[N];   //p1_dash = noisy parity#1 symbol
  457. double p2_d[N];   //p2_dash = noisy parity#2 symbol
  458. double L_h[N];    //L_hat = likelihood of databit given entire observation
  459. int    X_h[N];    //X_hat = sliced MAP decisions
  460. int    k;         //databit index (trellis stage)
  461.  
  462.    /********************************
  463.     *         INITIALISE           *
  464.     ********************************/
  465.  
  466. srand(1);    //init random number generator
  467.    gen_tab();   //generate trellis tables
  468. sigma  = 1;  //noise std deviation
  469.  
  470.    /********************************
  471.     *           ENCODER            *
  472.     ********************************/
  473.  
  474.    turbo_encode(X, P1, P2);
  475.  
  476.    //map bits to symbols
  477. for(k = 0; k < N; k++) //for entire block length
  478. {
  479. x[k]  = X[k]  ? +1 : -1;  //map databit to symbol
  480. p1[k] = P1[k] ? +1 : -1;  //map parity #1 to symbol
  481. p2[k] = P2[k] ? +1 : -1;  //map parity #2 to symbol
  482. }
  483.  
  484.    /********************************
  485.     *           CHANNEL            *
  486.     ********************************/
  487.  
  488.    //add some AWGN
  489. for(k = 0; k < N; k++)
  490. {
  491. #if NOISEOFF
  492. x_d[k]  = x[k];
  493. p1_d[k] = p1[k];
  494. p2_d[k] = p2[k];
  495. #else
  496. x_d[k]  = x[k]  + sigma*normal();
  497. p1_d[k] = p1[k] + sigma*normal();
  498. p2_d[k] = p2[k] + sigma*normal();
  499. #endif
  500. }
  501.  
  502.    #if DEBUG > 1
  503. for(k = 0; k < N; k++)
  504. printf("%f \t%f \t%f\n", x_d[k], p1_d[k], p2_d[k]);
  505. #endif
  506.  
  507.    /********************************
  508.     *           DECODER            *
  509.     ********************************/
  510.  
  511.    turbo_decode(sigma, x_d, p1_d, p2_d, L_h, X_h);
  512.  
  513.    //print soft decisions
  514. for(k = 0; k < N ; k++)
  515. printf("L_h[%i] = %f\n", k, L_h[k]);
  516. printf("\n");
  517.  
  518.    //print hard decisions
  519. printf("X_h = ");
  520.    for(k = 0; k < N; k++)
  521.     printf("%i", X_h[k]);
  522. printf("\n");
  523.  
  524. return 0;
  525. }
  526. // end of file: turbo_example.c
Advertisement
RAW Paste Data Copied
Advertisement