Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import re
- import requests
- from youtube_transcript_api import YouTubeTranscriptApi
- def extract_video_id(input_str):
- """Extract the video ID from a full YouTube URL or use it directly if an ID is given."""
- if "youtube.com" in input_str or "youtu.be" in input_str:
- return input_str.split("v=")[-1].split("&")[0].split("?")[0]
- return input_str # Assume it's a video ID
- def get_video_title(video_id):
- """Try to get the video title without using pytube."""
- try:
- url = f"https://www.youtube.com/watch?v={video_id}"
- response = requests.get(url)
- if response.status_code == 200:
- match = re.search(r'<title>(.*?) - YouTube</title>', response.text)
- if match:
- return match.group(1).strip()
- except Exception as e:
- print(f"Warning: Failed to fetch title ({e})")
- return "UnknownTitle" # Fallback if title cannot be fetched
- def get_youtube_transcript(video_id):
- """Retrieve transcript for a given YouTube video ID."""
- try:
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
- return "\n".join([entry["text"] for entry in transcript])
- except Exception as e:
- return f"Error retrieving transcript: {e}"
- def sanitize_filename(name):
- """Sanitize filename to remove special characters."""
- return re.sub(r'[\/:*?"<>|]', "", name)
- def main():
- if len(sys.argv) < 2:
- print("Usage: python script.py '<YouTube video URL or ID>'")
- sys.exit(1)
- input_str = sys.argv[1]
- video_id = extract_video_id(input_str)
- video_title = get_video_title(video_id)
- transcript = get_youtube_transcript(video_id)
- # Save transcript to a file with title and video ID
- filename = f"{sanitize_filename(video_title)}_{video_id}_transcript.txt"
- with open(filename, "w", encoding="utf-8") as f:
- f.write(transcript)
- print(f"Transcript saved as: {filename}")
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment