Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 19.56 KB | None | 0 0
  1. #include <vector>
  2. #include <string>
  3. #include <cmath>
  4. #include <iostream>
  5. #include <sndfile.h>
  6.  
  7. #ifdef WITH_RLIMIT
  8. #include <sys/resource.h>
  9. #endif
  10.  
  11.  
  12. //// Common types ////
  13.  
  14. typedef float sample_type;
  15. typedef std::vector < sample_type > samples;
  16.  
  17.  
  18. //// Math functions ////
  19.  
  20. static float const pi = 3.1415926535f;
  21.  
  22. unsigned int gcd(unsigned int p_first, unsigned int p_second)
  23. {
  24. if (p_first == 0) return p_second;
  25. if (p_second == 0) return p_first;
  26.  
  27. do
  28. {
  29. unsigned int temp = p_first % p_second;
  30. p_first = p_second;
  31. p_second = temp;
  32. }
  33. while (p_second != 0);
  34.  
  35. return p_first;
  36. }
  37.  
  38. unsigned int lcm(unsigned int p_first, unsigned int p_second)
  39. {
  40. unsigned int temp = gcd(p_first, p_second);
  41. return (temp != 0) ? (p_first * p_second / temp) : 0;
  42. }
  43.  
  44. float sinc(float f)
  45. {
  46. return (f == 0.0f) ? 1.0f : (std::sin(f * pi) / (f * pi));
  47. }
  48.  
  49. float hanning_func(int n, int N)
  50. {
  51. return 0.5f * (1.0f - std::cos(2.0f * pi * float(n) / float(N - 1)));
  52. }
  53.  
  54. samples compute_hanning_window(std::size_t const p_window_size)
  55. {
  56. samples window(p_window_size, 0);
  57. for (std::size_t i = 0; i < p_window_size; ++i)
  58. window[i] = hanning_func(i, p_window_size);
  59. return window;
  60. }
  61.  
  62. sample_type convolve(sample_type const *p_first_samples, sample_type const *p_second_samples, std::size_t const p_num_samples)
  63. {
  64. sample_type result = 0.0f;
  65. int num_samples = int(p_num_samples);
  66.  
  67. for (int first_index = 0; first_index < num_samples; ++first_index)
  68. {
  69. int second_index = num_samples - 1 - first_index;
  70.  
  71. sample_type first_sample = p_first_samples[first_index];
  72. sample_type second_sample = p_second_samples[second_index];
  73.  
  74. result += first_sample * second_sample;
  75. }
  76.  
  77. return result;
  78. }
  79.  
  80. samples compute_polyphase_filter_bank(std::size_t const p_upsampling_factor, std::size_t const p_downsampling_factor, std::size_t const p_filter_size)
  81. {
  82. // Create the window function that will be applied on top of the
  83. // stretched sinc. The window size equals the filter size plus the
  84. // upsampling factor, since we need to account for the extra nullsamples that
  85. // get inserted during zerostuffing.
  86. samples window = compute_hanning_window(p_filter_size * p_upsampling_factor);
  87.  
  88. samples filter(window.size(), 0);
  89.  
  90. // We need to create a low pass filter that cuts off frequencies at half of
  91. // either the input or the output sample rate, whichever is lower. However,
  92. // it turns out that for the filter calculation, we don't really need the
  93. // sample rates directly. Instead, we only need to "stretch" sinc by a
  94. // specific factor. A factor of 1 means no change. A factor of 2 means that
  95. // half of the original frequency range is cut off etc.
  96. //
  97. // If we only want to upsample by a factor of 2, and don't want to downsample,
  98. // then it is sufficient to stretch the sinc by a factor of 2. This is because
  99. // when we upsample, we apply zero stuffing, which creates unwantd spectral
  100. // images above the desired range. So, if for example we upsample by a factor
  101. // of 2, this means we insert 2-1 = 1 nullsample in between the original
  102. // samples (this is the zero-stuffing part). Like this:
  103. //
  104. // a b c d e f ... -> a 0 b 0 c 0 d 0 e 0 f 0 ...
  105. //
  106. // The lower 50% of the spectrum contains the original spectral image. So, we
  107. // need to get rid of anything except the lower half in the zero-stuffed signal.
  108. //
  109. // Another example: Upsampling by a factor of 3, no downsampling. Here,
  110. // we have 2 unwanted spectral images above the original one, and 3-1 = 2
  111. // nullsamples were stuffed in between the original samples. Like this:
  112. //
  113. // a b c d ... -> a 0 0 b 0 0 c 0 0 d 0 0 ...
  114. //
  115. // Since we only want the original image, and its spectrum only makes 1/3rd
  116. // of the spectrum of the zerostuffed signal, we stretch sinc by a factor of
  117. // 3, which causes it to lowpass-filter anything except the lowest 33,3% of
  118. // the spectrum.
  119. //
  120. // If we also downsample, then it depends by how much. If the downsampling
  121. // factor is higher than the upsampling, it means that the downsampled
  122. // signal will have a Nyquist frequency that is *lower* than that of the
  123. // original signal. Example: converting from 24000 Hz to 22050 Hz. Here,
  124. // we need to lowpass-filter to make sure we get rid of all frequencies
  125. // above 22050/2 = 11025 Hz, otherwise aliasing will occur in the downsampled
  126. // signal.
  127. //
  128. // If instead we downsample to a rate that is *higher* than the original
  129. // one, we lowpass-filter with the original signal's Nyquist frequency.
  130. // If we convert from 24000 Hz to 44100 Hz, we lowpass-filter to get rid
  131. // of all frequencies above 24000 / 2 = 12000 Hz.
  132. //
  133. // Again, the actual sample rate does not matter, only the factors do.
  134. // We simply pick the higher of the two (up/downsampling factors). This
  135. // corresponds to picking the lower of the two sample rates (input/output
  136. // sample rates).
  137. float scale_factor = float(std::max(p_downsampling_factor, p_upsampling_factor));
  138.  
  139. // The polyphase filter bank is stored as a one-dimensional array.
  140. // The polyphase filters are arranged in the array as shown in
  141. // this example:
  142. //
  143. // AAABBBCCCDDD
  144. //
  145. // Where A,B,C,D are the coefficients of polyphase filters A,B,C,D.
  146. // Upsampling factor is 4 (that's why there are four filters). Each
  147. // filter has 3 taps in this example.
  148.  
  149. // NOTE: It would be useful to reverse the filters, that is,
  150. // coefficients 1234 -> 4321, to make convolution easier, because
  151. // then it can be done using a simple multiply-and-add operation.
  152. // (Convolution does multiply-and-add, but with one of the inputs
  153. // reversed, so by pre-reversing one, it devolves into a simple
  154. // multiply-and-add.)
  155.  
  156. std::size_t num_filters = p_upsampling_factor;
  157. std::size_t num_filter_taps = window.size() / num_filters;
  158.  
  159. for (std::size_t i = 0; i < window.size(); ++i)
  160. {
  161. // Note that we do _not_ scale t by the window size here. So, we do not
  162. // normalize it in any way. This is intentional: The filter size defines
  163. // the _quality_ of the filtering. Larger filter means more sinc coefficients,
  164. // or in other words, we tap a larger range of sinc. If we normalized t,
  165. // we would always tap the _same_ range of the sinc function (the area
  166. // around the main lobe).
  167. // We do however offset t to make sure it ranges from -w/2 to w/2-1 instead of
  168. // 0 to w-1 . Otherwise we don't get a symmetric sinc tap.
  169. // TODO: Should it be from -w to +w instead?
  170. float t = int(i) - int(window.size() / 2);
  171.  
  172. // Compute stretched sinc.
  173. float f = sinc(t / scale_factor);
  174.  
  175. std::size_t polyphase_filter_index = i % num_filters;
  176. std::size_t offset_in_polyphase_filter = i / num_filters;
  177. std::size_t filter_array_idx = polyphase_filter_index * num_filter_taps + offset_in_polyphase_filter;
  178.  
  179. // Apply the window function on top of the sinc
  180. // function to produce filter coefficients.
  181. filter[filter_array_idx] = f * window[i];
  182. }
  183.  
  184. // TODO: It is unclear why this is necessary, but without this,
  185. // the output signal may be incorrectly amplified.
  186. if (p_downsampling_factor > p_upsampling_factor)
  187. {
  188. float dampening_factor = float(p_upsampling_factor) / p_downsampling_factor;
  189. for (auto & filter_coefficient : filter)
  190. filter_coefficient *= dampening_factor;
  191. }
  192.  
  193. return filter;
  194. }
  195.  
  196.  
  197. //// Sound I/O functionality ////
  198.  
  199. class sound_file
  200. {
  201. public:
  202. sound_file()
  203. : m_sndfile(nullptr)
  204. , m_num_samples(0)
  205. , m_sample_rate(0)
  206. {
  207. }
  208.  
  209. bool open_for_reading(std::string const &p_filename)
  210. {
  211. if (m_sndfile != nullptr)
  212. {
  213. std::cerr << "A file has already been opened\n";
  214. return false;
  215. }
  216.  
  217. SF_INFO info;
  218. info.format = 0;
  219.  
  220. if (!open_internal(p_filename, true, info))
  221. return false;
  222.  
  223. if (info.channels != 1)
  224. {
  225. sf_close(m_sndfile);
  226. m_sndfile = nullptr;
  227. std::cerr << "File \"" << p_filename << "\" has " << info.channels << " channels; only mono is supported" << std::endl;
  228. return false;
  229. }
  230.  
  231. // Technically, this is wrong, since 1 frame is a collection of N
  232. // samples, with N being the number of channels. But, since we
  233. // anyway only support mono in this example, it does not matter.
  234. m_num_samples = info.frames;
  235. m_sample_rate = info.samplerate;
  236.  
  237. return true;
  238. }
  239.  
  240. bool open_for_writing(std::string const &p_filename, unsigned int const p_sample_rate)
  241. {
  242. if (m_sndfile != nullptr)
  243. {
  244. std::cerr << "A file has already been opened\n";
  245. return false;
  246. }
  247.  
  248. SF_INFO info;
  249. info.format = SF_FORMAT_WAV | SF_FORMAT_FLOAT;
  250. info.samplerate = p_sample_rate;
  251. info.channels = 1;
  252.  
  253. if (!open_internal(p_filename, false, info))
  254. return false;
  255.  
  256. m_num_samples = 0;
  257. m_sample_rate = p_sample_rate;
  258.  
  259. return true;
  260. }
  261.  
  262. ~sound_file()
  263. {
  264. if (m_sndfile != nullptr)
  265. sf_close(m_sndfile);
  266. }
  267.  
  268. std::size_t get_total_num_input_samples() const
  269. {
  270. return m_num_samples;
  271. }
  272.  
  273. unsigned int get_sample_rate() const
  274. {
  275. return m_sample_rate;
  276. }
  277.  
  278. std::size_t read_all_samples(samples &p_samples)
  279. {
  280. return read_samples(p_samples, m_num_samples);
  281. }
  282.  
  283. std::size_t read_samples(samples &p_samples, std::size_t const p_num_samples_to_read)
  284. {
  285. if (p_num_samples_to_read > p_samples.size())
  286. p_samples.resize(p_num_samples_to_read);
  287.  
  288. return sf_read_float(m_sndfile, &p_samples[0], p_num_samples_to_read);
  289. }
  290.  
  291. void write_samples(samples const &p_samples, std::size_t p_num_samples_to_write)
  292. {
  293. if (p_num_samples_to_write > p_samples.size())
  294. p_num_samples_to_write = p_samples.size();
  295.  
  296. sf_write_float(m_sndfile, &p_samples[0], p_num_samples_to_write);
  297. }
  298.  
  299.  
  300. private:
  301. bool open_internal(std::string const &p_filename, bool const p_read, SF_INFO &p_info)
  302. {
  303. m_sndfile = sf_open(p_filename.c_str(), p_read ? SFM_READ : SFM_WRITE, &p_info);
  304.  
  305. if (m_sndfile == nullptr)
  306. {
  307. std::cerr << "Could not open file \"" << p_filename << "\" for " << (p_read ? "reading" : "writing") << std::endl;
  308. }
  309.  
  310. return (m_sndfile != nullptr);
  311. }
  312.  
  313. SNDFILE *m_sndfile;
  314. std::size_t m_num_samples;
  315. unsigned int m_sample_rate;
  316. };
  317.  
  318.  
  319. //// main ////
  320.  
  321. int main(int argc, char *argv[])
  322. {
  323. // Get the command line arguments.
  324. if (argc < 5)
  325. {
  326. std::cerr << "Usage: " << argv[0] << " <input filename> <output WAV filename> <output sample rate> <filter size>\n";
  327. return -1;
  328. }
  329.  
  330. std::string input_filename = argv[1];
  331. std::string output_filename = argv[2];
  332. unsigned int output_sample_rate = std::stoul(argv[3]);
  333. std::size_t filter_size = std::stoul(argv[4]);
  334.  
  335.  
  336. #ifdef WITH_RLIMIT
  337. // Limit the amount of memory this process can allocate to 1 GB to
  338. // prevent lots of swap activity in case we allocate too much. Since
  339. // this example just loads the entirety of the input file to memory,
  340. // this can happen with long tracks. With this rlimit, the process
  341. // is killed as soon as the limit is hit.
  342. struct rlimit memlim;
  343. getrlimit(RLIMIT_AS, &memlim);
  344. memlim.rlim_cur = std::min(std::size_t(1024*1024*1024), memlim.rlim_cur);
  345. setrlimit(RLIMIT_AS, &memlim);
  346. #endif
  347.  
  348.  
  349. // Open the input file.
  350. // NOTE: In this example, we read and sample rate convert the entire
  351. // input file at once. This is for sake of clarity and simplicity.
  352. // In production, the conversion would not be done that way. Instead,
  353. // the input samples would be streamed in and converted on the fly.
  354. // This saves a ton of memory, and would also work with live streams.
  355.  
  356. sound_file input_sound_file;
  357. if (!input_sound_file.open_for_reading(input_filename))
  358. return -1;
  359. samples input_samples;
  360. input_sound_file.read_all_samples(input_samples);
  361. if (input_samples.empty())
  362. {
  363. std::cerr << "Did not get any input samples\n";
  364. return -1;
  365. }
  366.  
  367. unsigned int input_sample_rate = input_sound_file.get_sample_rate();
  368.  
  369.  
  370. // Sample rate conversion works by first interpolating the signal
  371. // to a higher sample rate, then decimating to a lower sample rate.
  372. //
  373. // Interpolation means that new samples are introduced in between
  374. // the original input samples, followed by a lowpass filter that is
  375. // applied on that augmented input signal. The signal is augmented
  376. // by inserting zeros in between the original samples. This is
  377. // called "zero-stuffing". Zero-stuffing introduces copies of the
  378. // original spectrum, _above_ the original spectrum. This is where
  379. // the low-pass filter comes in - it cuts off these unwanted copies,
  380. // leaving us with an upsampled version of the original signal.
  381. //
  382. // So, if we want to upsample the signal by an integer factor N,
  383. // we insert N-1 nullsamples between the original samples.
  384. //
  385. // Decimation can then be applied. This simply involves picking
  386. // every Mth sample, M being the decimation factor. M=1 means no
  387. // decimation.
  388. //
  389. // In short, first we upsample by N by applying interpolation,
  390. // then we downsample by M by applying decimation. The M/N ratio
  391. // is the overall sample rate conversion ratio. Upsampling factor
  392. // N is also the interpolation factor N. Downsampling factor M
  393. // is also the decimation factor M.
  394. //
  395. // The up/downsampling factors are derived from the input and
  396. // output sample rates. To that end, the least common denominator
  397. // of the sample rates is determined. That's because the up- and
  398. // downsampling factors influence filter sizes, and we want filters
  399. // to not be unnecessarily large. For example, input sample rate
  400. // 48000 Hz, output sample rate 44100 Hz, that's a ratio of
  401. // 48000/44100. We could use N=48000, M=44100, but for better
  402. // efficiency (as explained above), we reduce this and come up
  403. // with an equal ratio of 160/147.
  404. //
  405. // Once the factors are known, we could in theory get the input
  406. // signal's samples and stuff in nullsamples between these samples.
  407. // However, that would be wasteful (in the example above it would
  408. // increase the input signal size by a factor of 160). We can
  409. // make the observation that during convolution, these nullsamples
  410. // don't contribute to the output, because these nullsamples are
  411. // multiplied with filter coefficients and added. So, an equation
  412. // like:
  413. //
  414. // output = sample * coeff1 + 0 * coeff2 + 0 * coeff3 ...
  415. //
  416. // will only really be influenced by the original samples, not
  417. // by the added nullsamples.
  418. //
  419. // The idea then is to simply omit these nullsamples and only
  420. // pick the coefficients that would actually be applied (in the
  421. // example above, coeff1 would be applied, while coeff2 and
  422. // coeff3 would not). We observe that in a zerostuffed signal,
  423. // the original samples appear in every Nth position. So, in this
  424. // zerostuffed signal:
  425. //
  426. // a 0 0 b 0 0 c ..
  427. //
  428. // every 3rd sample is an original sample. If we have a filter
  429. // with 12 taps, then its coefficients can be decomposed into
  430. // subfilters. The number of filters equal the upsampling factor.
  431. // In this example, the decomposition would be:
  432. //
  433. // Original filter: h1 h2 h3 h4 h5 h6 h7 h8 h9 h10 h11 h12
  434. // Subfilter 1: h1 h4 h7 h10
  435. // Subfilter 2: h2 h5 h8 h11
  436. // Subfilter 3: h3 h6 h9 h12
  437. //
  438. // During decimation, we would normally pick a sample from the
  439. // interpolated signal. Suppose that we didn't use polyphase
  440. // filters, but instead actually did use zerostuffing and
  441. // filtered that zerostuffed signal. Suppose upsampling factor
  442. // N is 3, downsampling factor M is 2.
  443. //
  444. // Original signal: i1 i2 i3 i4 i5 i6 i7 ..
  445. // Zerostuffed signal: i1 0 0 i2 0 0 i3 0 0 i4 0 0 i5 0 0 ..
  446. // Interpolated signal: i1 a b i2 c d i3 e f i4 g h i6 i j ..
  447. // (a-j are newly interpolated samples that resulted from
  448. // convolving the filter with the zerostuffed signal)
  449. //
  450. // Decimation factor M = 2 means we pick every 2nd
  451. // interpolated sample, and get the output signal
  452. // i1 b c i3 f g i6 ..
  453. //
  454. // We optimize this by using polyphase filters, eliminating
  455. // the need for an intermediate zerostuffed signal. Instead,
  456. // we pick the subfilter that would produce the sample that
  457. // we pick during decimation.
  458. //
  459. // From the example above, suppose we are about to pick
  460. // sample "b". to compute "b", we pick the appropriate
  461. // subfilter. We find this subfilter by computing the
  462. // position in the interpolated signal. This we call the
  463. // "interpolated position". In the case of "b", this position
  464. // would be 2. (Positions start at 0.) We then apply modulo 3
  465. // (the number of subfilters) to pick the appropriate subfilter.
  466. // That's subfilter (2 mod 3) = 2 in this case.
  467. //
  468. // Then we need the position in the original input signal that
  469. // corresponds to the "b" sample". This we get by calculating
  470. // position_in_original_signal = position_in_output_signal * M / N
  471. // So, in this case, "b" is at position #1 in the output signal,
  472. // and 1*2/3 = 0. Now we have the position in the input signal
  473. // where we get the input samples that we want to convolve with
  474. // the subfilter #2 we picked earlier. This means that this
  475. // subfilter is convolved with input samples i1 to i4.
  476. //
  477. // Another example would be if we wanted to compute sample "g",
  478. // which is at "interpolated position" position 10 and at output
  479. // position 5. 10 mod 3 = 1, and 5*2/3 = 3. So, we would convolve
  480. // input samples i4 to i7 with subfilter #1.
  481.  
  482.  
  483. // Compute the up- and downsampling factors.
  484. unsigned int sample_rate_lcm = lcm(input_sample_rate, output_sample_rate);
  485. // This is the interpolation factor N mentioned above.
  486. unsigned int upsampling_factor = sample_rate_lcm / input_sample_rate;
  487. // This is the decimation factor M mentioned above.
  488. unsigned int downsampling_factor = sample_rate_lcm / output_sample_rate;
  489.  
  490.  
  491. // Print some info.
  492. std::cerr << "Input / output sample rate: " << input_sample_rate << " Hz / " << output_sample_rate << " Hz\n";
  493. std::cerr << "Up / downsampling factors: " << upsampling_factor << " / " << downsampling_factor << "\n";
  494. std::cerr << "Filter size: " << filter_size << "\n";
  495.  
  496.  
  497. // Prepare the polyphase filter and the output samples buffer.
  498. samples polyphase_filter_bank = compute_polyphase_filter_bank(upsampling_factor, downsampling_factor, filter_size);
  499. samples output_samples(input_samples.size() * upsampling_factor / downsampling_factor, 0);
  500.  
  501. std::size_t num_polyphase_filters = upsampling_factor;
  502. std::size_t polyphase_filter_size = polyphase_filter_bank.size() / num_polyphase_filters;
  503.  
  504.  
  505. // Open the output file.
  506. sound_file output_sound_file;
  507. if (!output_sound_file.open_for_writing(output_filename, output_sample_rate))
  508. return -1;
  509.  
  510.  
  511. // Now perform the sample rate conversion.
  512. std::size_t output_position = 0;
  513. std::size_t interpolated_position = 0;
  514. std::size_t last_progress = 0;
  515. // Don't iterate over the last filter_size samples. That's because
  516. // convolve() will access all samples from output_position to
  517. // (output_position + polyphase_filter_size - 1).
  518. for (; output_position < output_samples.size() - filter_size; ++output_position, interpolated_position += downsampling_factor)
  519. {
  520. // Print some progress. We keep track of the last computed
  521. // progress to check if we should actually print something.
  522. // Otherwise, the constant console output could actually
  523. // slow down this code (and it floods the console with
  524. // lines of course).
  525. std::size_t progress = output_position / 100000;
  526. if (progress != last_progress)
  527. {
  528. std::cerr << output_position << "/" << output_samples.size() << "(" << (float(output_position) / float(output_samples.size()) * 100.0f) << "%)\n";
  529. last_progress = progress;
  530. }
  531.  
  532. // Pick the right subfilter.
  533. std::size_t polyphase_filter_index = interpolated_position % num_polyphase_filters;
  534. sample_type const *polyphase_filter_coefficients = &(polyphase_filter_bank[polyphase_filter_index * polyphase_filter_size]);
  535.  
  536. // Pick what input samples we need to convolve with the
  537. // chosen subfilter.
  538. sample_type const *input_samples_coefficients = &(input_samples[output_position * downsampling_factor / upsampling_factor]);
  539.  
  540. // Perform the convolution, producing the sample rate
  541. // converted output.
  542. output_samples[output_position] = convolve(input_samples_coefficients, polyphase_filter_coefficients, polyphase_filter_size);
  543. }
  544.  
  545.  
  546. // Write the result.
  547. output_sound_file.write_samples(output_samples, output_samples.size());
  548.  
  549.  
  550. return 0;
  551. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement