kyuurzy

YouTube Transcript

Oct 8th, 2025 (edited)
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
JavaScript 1.95 KB | Source Code | 0 0
  1. const fetch = require("node-fetch")
  2.  
  3. async function getYouTubeCaptions(videoUrl) {
  4.   const response = await fetch('https://website-tools-dot-maestro-218920.uk.r.appspot.com/getYoutubeCaptions', {
  5.     method: 'POST',
  6.     headers: {
  7.       'Content-Type': 'application/json',
  8.       'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Mobile Safari/537.36',
  9.       'Referer': 'https://maestra.ai/tools/video-to-text/youtube-transcript-generator?v=RY1zQHjUG_Q'
  10.     },
  11.     body: JSON.stringify({
  12.       videoUrl: videoUrl
  13.     })
  14.   });
  15.  
  16.   const result = await response.json();
  17.   const transcript = extractTranscript(result.selectedCaptions);
  18.  
  19.   return {
  20.     videoID: result.videoID,
  21.     defaultLanguage: result.defaultLanguage,
  22.     transcript: transcript
  23.   };
  24. }
  25.  
  26. function extractTranscript(webvttText) {
  27.   let lines = webvttText.split('\n').filter(line =>
  28.     !line.startsWith('WEBVTT') &&
  29.     !line.startsWith('Kind:') &&
  30.     !line.startsWith('Language:') &&
  31.     line.trim() !== ''
  32.   );
  33.  
  34.   let transcript = [];
  35.   let currentText = '';
  36.  
  37.   for (let line of lines) {
  38.     if (line.includes('-->') || line.includes('align:') || line.includes('position:')) {
  39.       continue;
  40.     }
  41.    
  42.     if (line.trim() === '') {
  43.       if (currentText.trim() !== '') {
  44.         transcript.push(currentText.trim());
  45.         currentText = '';
  46.       }
  47.       continue;
  48.     }
  49.    
  50.     let cleanLine = line.replace(/<[^>]+>/g, '').trim();
  51.    
  52.     if (cleanLine !== '') {
  53.       if (currentText !== '') {
  54.         currentText += ' ';
  55.       }
  56.       currentText += cleanLine;
  57.     }
  58.   }
  59.  
  60.   if (currentText.trim() !== '') {
  61.     transcript.push(currentText.trim());
  62.   }
  63.  
  64.   return transcript.filter(text => text !== '[Musik]' && text !== '[Music]');
  65. }
  66.  
  67. //use
  68. const captions = await getYouTubeCaptions("https://youtu.be/FtHaMWRn8Mc?si=G503YW_gdLl4NWL_");
  69. console.log(JSON.stringify(captions, null, 2));
Tags: Scrape
Advertisement
Add Comment
Please, Sign In to add comment