Guest User

wasm cg-conv

a guest
Jun 9th, 2025
10
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.29 KB | None | 0 0
  1. async function cgConv(input_stream, options = {}) {
  2. if (!cg3) {
  3. throw new Error('CG3 module not initialized');
  4. }
  5.  
  6. // Default options
  7. const {
  8. input_format = 'auto', // 'auto', 'cg', 'niceline', 'apertium', 'fst', 'plain', 'jsonl'
  9. output_format = 'cg', // 'cg', 'niceline', 'apertium', 'fst', 'plain', 'jsonl'
  10. unicode_tags = false,
  11. pipe_deleted = false,
  12. no_break = false,
  13. parse_dep = false,
  14. add_tags = false,
  15. sub_ltr = false,
  16. mapping_prefix = null,
  17. sub_delimiter = null,
  18. fst_wtag = null,
  19. fst_wfactor = null
  20. } = options;
  21.  
  22. // Map format strings to cg3_sformat enum values
  23. const formatMap = {
  24. 'cg': 1, // CG3SF_CG
  25. 'niceline': 2, // CG3SF_NICELINE
  26. 'apertium': 3, // CG3SF_APERTIUM
  27. 'fst': 5, // CG3SF_FST
  28. 'plain': 6, // CG3SF_PLAIN
  29. 'jsonl': 7 // CG3SF_JSONL
  30. };
  31.  
  32. // Wrap libcg3 functions
  33. const cg3_detect_sformat_buffer = cg3.cwrap('cg3_detect_sformat_buffer', 'number', ['string', 'number']);
  34. const cg3_sconverter_create = cg3.cwrap('cg3_sconverter_create', 'number', ['number', 'number']);
  35. const cg3_sconverter_run_fns = cg3.cwrap('cg3_sconverter_run_fns', null, ['number', 'string', 'string']);
  36. const cg3_applicator_free = cg3.cwrap('cg3_applicator_free', null, ['number']);
  37.  
  38. let input_fmt;
  39.  
  40. // Determine input format
  41. if (input_format === 'auto') {
  42. input_fmt = cg3_detect_sformat_buffer(input_stream, input_stream.length);
  43. if (input_fmt === 0) { // CG3SF_INVALID
  44. throw new Error('Could not detect input format');
  45. }
  46. } else {
  47. input_fmt = formatMap[input_format];
  48. if (!input_fmt) {
  49. throw new Error(`Unknown input format: ${input_format}`);
  50. }
  51. }
  52.  
  53. // Determine output format
  54. const output_fmt = formatMap[output_format];
  55. if (!output_fmt) {
  56. throw new Error(`Unknown output format: ${output_format}`);
  57. }
  58.  
  59. // Create format converter
  60. const converter = cg3_sconverter_create(input_fmt, output_fmt);
  61. if (converter === 0) {
  62. throw new Error('Failed to create format converter');
  63. }
  64.  
  65. try {
  66. // Set converter options if needed
  67. // Note: The libcg3 API doesn't expose all the FormatConverter options directly,
  68. // so some advanced options from cg-conv might not be available through the C API
  69.  
  70. // Create temporary files for input and output
  71. const timestamp = Date.now();
  72. const randomFloat = Math.random();
  73. const inputFile = `/tmp/${timestamp}-${randomFloat}.in`;
  74. const outputFile = `/tmp/${timestamp}-${randomFloat}.out`;
  75.  
  76. // Write input to temporary file
  77. cg3.FS.writeFile(inputFile, input_stream, { encoding: 'utf8' });
  78.  
  79. // Run conversion
  80. cg3_sconverter_run_fns(converter, inputFile, outputFile);
  81.  
  82. // Read output
  83. const output_stream = cg3.FS.readFile(outputFile, { encoding: 'utf8' });
  84.  
  85. // Clean up temporary files
  86. cg3.FS.unlink(inputFile);
  87. cg3.FS.unlink(outputFile);
  88.  
  89. return output_stream;
  90.  
  91. } finally {
  92. // Clean up converter
  93. cg3_applicator_free(converter);
  94. }
  95. }
Advertisement
Add Comment
Please, Sign In to add comment