Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
- use std::collections::HashMap;
- use std::fmt::Pointer;
- use std::fs::File;
- use std::io::Read;
- use std::io::{BufRead, BufReader, Seek};
- use std::ops::Add;
- use std::process::Command;
- use std::process::Stdio;
- #[derive(Debug, Clone, Copy)]
- struct CCSym {
- sym: char,
- style: SymStyle
- }
- #[derive(Debug, Clone, Copy)]
- struct SymStyle {
- color: u8,
- underlined: bool,
- transparent_bg: bool,
- }
- #[derive(Debug)]
- struct CCState {
- row: u8,
- col: u8,
- line_buffer: Vec<Vec<Option<CCSym>>>,
- screen_buffer: Vec<Vec<Option<CCSym>>>,
- style: SymStyle,
- last_command: Option<(u8,u8,CC_Command)>
- }
- fn convert_char_special(c: &u8) -> char {
- match c {
- 0x0 => '®',
- 0x1 => '°',
- 0x2 => '½',
- 0x3 => '¿',
- 0x4 => '™',
- 0x5 => '¢',
- 0x6 => '£',
- 0x7 => '♪',
- 0x8 => 'à',
- 0x9 => ' ',
- 0xA => 'è',
- 0xB => 'â',
- 0xC => 'ê',
- 0xD => 'î',
- 0xE => 'ô',
- 0xF => 'û',
- other => panic!("Invalid spcial char {}", other)
- }
- }
- fn convert_char(c: &u8) -> char {
- match c {
- 0x00 => panic!("convert_char does not handle zero chars"),
- 0x2a => 'á',
- 0x5c => 'é',
- 0x5e => 'í',
- 0x5f => 'ó',
- 0x60 => 'ú',
- 0x7b => 'ç',
- 0x7c => '÷',
- 0x7d => 'Ñ',
- 0x7e => 'ñ',
- 0x7f => '█',
- c => char::from(*c)
- }
- }
- impl CCState {
- fn new() -> Self {
- Self {
- row:0,
- col:0,
- line_buffer: vec![vec![None;0xff];0xff],
- screen_buffer: vec![vec![None;0xff];0xff],
- style: SymStyle {
- color:0,
- underlined: false,
- transparent_bg: false
- },
- last_command: None,
- }
- }
- fn print(&mut self, c: char) {
- let c= CCSym {
- sym: c,
- style: self.style
- };
- self.line_buffer[self.row as usize][self.col as usize]=Some(c);
- }
- fn render_buffer(&self) -> Vec<String> {
- let mut ret = vec![];
- for row in &self.screen_buffer {
- let line : String = row.iter().flatten().map(|v| v.sym).collect();
- if !line.is_empty() {
- ret.push(line);
- }
- }
- return ret;
- }
- fn handle_control(&mut self, caption: u8, field: u8, cmd: &CC_Command) -> Option<Vec<String>> {
- let current_command = Some((caption,field,*cmd));
- if self.last_command==current_command {
- // println!("Skipping redundant command: {:?}",cmd);
- self.last_command=None;
- return None;
- }
- // println!("CMD: {:?} | Prev: {:?}",(caption,field,cmd),current_command);
- self.last_command = current_command;
- match cmd {
- CC_Command::Load => return None,
- CC_Command::Backspace => self.col=self.col.saturating_sub(1),
- CC_Command::ClearLine => todo!(),
- CC_Command::Alarm(_) => todo!(),
- CC_Command::ScrollUp(_) => todo!(),
- CC_Command::Flash => todo!(),
- CC_Command::Start(_) => todo!(),
- CC_Command::Resume => todo!(),
- CC_Command::ClearScreen => self.clear_screen_buffer(),
- CC_Command::CarriageReturn => todo!(),
- CC_Command::ClearBuffer => self.clear_line_buffer(),
- CC_Command::DisplayBuffer => {
- std::mem::swap(&mut self.screen_buffer,&mut self.line_buffer);
- return Some(self.render_buffer());
- },
- }
- None
- }
- fn get_row(&mut self, kind: &PreambleType, row: u8, next_row: bool) -> u8 {
- use PreambleType::*;
- let row= match (kind,row) {
- (Standard,0b00) => 11,
- (Standard,0b01) => 1,
- (Standard,0b10) => 3,
- (Standard,0b11) => 12,
- (Extended,0b00) => 14,
- (Extended,0b01) => 5,
- (Extended,0b10) => 7,
- (Extended,0b11) => 9,
- _ => unreachable!()
- };
- row + (next_row as u8)
- }
- fn handle_style(&mut self, kind: &PreambleType, style: &Style) {
- self.row = self.get_row(kind,style.row,style.next_row);
- self.style.color=style.style;
- self.style.underlined=style.undeline;
- self.col=0;
- }
- fn handle_address(&mut self, kind: &PreambleType, addr: &Address) {
- self.row = self.get_row(kind,addr.row,addr.next_row);
- self.col = addr.cursor*4;
- self.style.underlined=addr.undeline;
- }
- fn clear_screen_buffer(&mut self) {
- self.screen_buffer=vec![vec![None;0xff];0xff];
- }
- fn clear_line_buffer(&mut self) {
- self.line_buffer=vec![vec![None;0xff];0xff];
- }
- fn update(&mut self, event: &CaptionData) -> Option<Vec<String>> {
- match &event {
- CaptionData::Control{..} => (),
- _ => self.last_command = None,
- }
- match event {
- CaptionData::Padding => (),
- CaptionData::XDS(_, _) => todo!(),
- CaptionData::Character(c1, c2) => {
- if *c1!=0 {
- self.print(convert_char(c1));
- self.col+=1;
- };
- if *c2!=0 {
- self.print(convert_char(c2));
- self.col+=1;
- };
- },
- CaptionData::CharacterSpecial { channel, char } => {
- self.print(convert_char_special(char));
- self.col+=1;
- },
- CaptionData::CharacterWesternEurope { channel, charset, char } => todo!(),
- CaptionData::CharacterNorpak { channel, charset } => todo!(),
- CaptionData::Style(kind, style) => self.handle_style(kind, style),
- CaptionData::Address(kind, addr) => self.handle_address(kind, addr),
- CaptionData::BackgroundColor { channel, color, transparent } => todo!(),
- CaptionData::MidrowStyle { channel, style, underlined } => {
- self.style.color= *style;
- self.style.underlined= *underlined;
- },
- CaptionData::NoBG { channel } => {
- self.style.transparent_bg=true;
- },
- CaptionData::BlackText { channel, undelined } => todo!(),
- CaptionData::Control { caption, field, command } => {
- let line = self.handle_control(*caption, *field,command);
- if line.is_some() {
- return line;
- }
- },
- CaptionData::Tab(n) => {
- self.col+=n;
- },
- };
- None
- }
- }
- impl Default for CCState {
- fn default() -> Self {
- Self::new()
- }
- }
- const MASKS: [((&str, &str), &str); 16] = [
- (("?0000000", "?0000000"), "Padding"),
- (("?CCCCCCC", "?CCCCCCC"), "Character"),
- (("?000CCCC", "?000TTTT"), "XDS metadata"),
- (
- ("?001C001", "?011CCCC"),
- "Special North American character set",
- ),
- (
- ("?001C01S", "?01CCCCC"),
- "Extended Western European character set",
- ),
- (
- ("?001C111", "?010CCCC"),
- "Non-Western Norpak Character Sets",
- ),
- (("?001C0RR", "?1N0SSSU"), "Standard Style"),
- (("?001C0RR", "?1N1SSSU"), "Standard Address"),
- (("?001C1RR", "?1N1SSSU"), "Extended Style"),
- (("?001C1RR", "?1N0SSSU"), "Extended Address"),
- (("?001C000", "?010CCCT"), "BG Color"),
- (("?001C001", "?010SSSU"), "Midrow Style"),
- (("?001C111", "?0101101"), "No BG"),
- (("?001C111", "?010111U"), "Black Text"),
- (("?001C10F", "?010CCCC"), "Control"),
- (("?001C111", "?01000TT"), "Tab"),
- ];
- #[derive(Debug)]
- struct Packet {
- pos: u64,
- time: f64,
- }
- #[derive(Debug, PartialEq, Eq, Clone, Copy)]
- enum CC_Command {
- Load,
- Backspace,
- ClearLine,
- Alarm(bool),
- ScrollUp(u8),
- Flash,
- Start(bool),
- Resume,
- ClearScreen,
- CarriageReturn,
- ClearBuffer,
- DisplayBuffer,
- }
- impl From<u8> for CC_Command {
- fn from(cmd: u8) -> Self {
- use CC_Command::*;
- match cmd {
- 0b0000 => Load,
- 0b0001 => Backspace,
- 0b0010 => Alarm(false),
- 0b0011 => Alarm(true),
- 0b0100 => ClearLine,
- 0b0101 => ScrollUp(2),
- 0b0110 => ScrollUp(3),
- 0b0111 => ScrollUp(4),
- 0b1000 => Flash,
- 0b1001 => Start(true),
- 0b1010 => Start(false),
- 0b1011 => Resume,
- 0b1100 => ClearScreen,
- 0b1101 => CarriageReturn,
- 0b1110 => ClearBuffer,
- 0b1111 => DisplayBuffer,
- other => panic!("Invalid command: {}", other),
- }
- }
- }
- #[derive(Debug, PartialEq, Eq)]
- struct Style {
- channel: bool,
- row: u8,
- next_row: bool,
- style: u8,
- undeline: bool,
- }
- #[derive(Debug, PartialEq, Eq)]
- struct Address {
- channel: bool,
- row: u8,
- next_row: bool,
- cursor: u8,
- undeline: bool,
- }
- #[derive(Debug, PartialEq, Eq)]
- enum PreambleType {
- Standard,
- Extended,
- }
- #[derive(Debug, PartialEq, Eq)]
- enum CaptionData {
- Padding,
- XDS(u8, u8),
- Character(u8, u8),
- CharacterSpecial {
- channel: bool,
- char: u8,
- },
- CharacterWesternEurope {
- channel: bool,
- charset: bool,
- char: u8,
- },
- CharacterNorpak {
- channel: bool,
- charset: u8,
- },
- Style(PreambleType, Style),
- Address(PreambleType, Address),
- BackgroundColor {
- channel: bool,
- color: u8,
- transparent: bool,
- },
- MidrowStyle {
- channel: bool,
- style: u8,
- underlined: bool,
- },
- NoBG {
- channel: u8,
- },
- BlackText {
- channel: u8,
- undelined: bool,
- },
- Control {
- caption: u8,
- field: u8,
- command: CC_Command,
- },
- Tab(u8),
- }
- #[derive(Debug, PartialEq, Eq)]
- struct Caption {
- odd_field: bool,
- data: CaptionData,
- }
- #[derive(Debug, PartialEq, Eq)]
- struct CC {
- user_data_type: u8,
- block_size: u8,
- odd_field_first: bool,
- filler: bool,
- block: Vec<Caption>,
- }
- fn parse_line(line: &str) -> Option<Packet> {
- if line.is_empty() {
- return None;
- }
- let line: Vec<&str> = line.split('|').collect();
- let data_type = line[0];
- let data: HashMap<&str, &str> = line
- .into_iter()
- .skip(1)
- .map(|v| {
- let v: Vec<&str> = v.split('=').collect();
- (v[0], v[1])
- })
- .collect();
- let codec_type = data["codec_type"];
- match (data_type, codec_type) {
- ("packet", "subtitle") => Some(Packet {
- pos: data["pos"].parse().unwrap(),
- time: data["pts_time"].parse().unwrap(),
- }),
- _ => None,
- }
- }
- fn to_bits(n: u8) -> [bool; 8] {
- let mut ret = [false; 8];
- for v in 0..8 {
- ret[7 - v] = ((n >> v) & 1) == 1;
- }
- return ret;
- }
- fn from_bits(n: &[bool]) -> u8 {
- let mut ret = 0;
- for &b in n.iter() {
- ret <<= 1;
- ret |= b as u8;
- }
- ret
- }
- fn bitmatch(n: [bool; 8], mask: &str) -> Option<HashMap<char, u8>> {
- if mask.len() != 8 {
- panic!("bitmach mask should be 8 characters!");
- }
- let mut m: HashMap<char, Vec<bool>> = HashMap::new();
- for (&b, c) in n.iter().zip(mask.chars()) {
- if c == '1' && !b {
- return None;
- }
- if c == '0' && b {
- return None;
- }
- if c == '1' || c == '0' || c == '?' {
- continue;
- }
- let v = m.entry(c).or_default();
- v.push(b);
- }
- Some(
- m.iter()
- .map(|(&k, v)| {
- let v = from_bits(v);
- (k, v)
- })
- .collect(),
- )
- }
- fn valid(data: &CaptionData) -> bool {
- use CaptionData::*;
- match data {
- Character(c1, c2) => {
- let c1=*c1;
- let c2=*c2;
- let c1 = c1 == 0 || c1 >= 0x20;
- let c2 = c2 == 0 || c2 >= 0x20;
- c1 && c2
- }
- Padding | Control { .. } | Address { .. } | MidrowStyle { .. } | Style(..) => true,
- CharacterSpecial { .. } => true,
- other => todo!("{:?}", other),
- }
- }
- fn parse_caption_data(field: &[u8]) -> Result<CaptionData, Box<dyn std::error::Error>> {
- let (c1, c2) = (field[0] & 0x7f, field[1] & 0x7f);
- if c1 == 0 && c2 == 0 {
- return Ok(CaptionData::Padding);
- }
- let b1 = to_bits(c1);
- let b2 = to_bits(c2);
- if c1 != 0 && c2 != 0 {
- // println!("CC: {:08b} {:08b}", c1, c2);
- }
- for ((m1, m2), name) in MASKS {
- if let (Some(m1), Some(m2)) = (bitmatch(b1, m1), bitmatch(b2, m2)) {
- // println!("Try: {} ({:?},{:?})",name,m1,m2);
- let data = match name {
- "Character" => CaptionData::Character(m1[&'C'], m2[&'C']),
- "Control" => CaptionData::Control {
- caption: 0,
- field: 0,
- command: CC_Command::from(m2[&'C']),
- },
- "Standard Address" => CaptionData::Address(
- PreambleType::Standard,
- Address {
- channel: m1[&'C'] == 1,
- row: m1[&'R'],
- next_row: m2[&'N'] == 1,
- cursor: m2[&'S'],
- undeline: m2[&'U'] == 1,
- },
- ),
- "Extended Address" => CaptionData::Address(
- PreambleType::Extended,
- Address {
- channel: m1[&'C'] == 1,
- row: m1[&'R'],
- next_row: m2[&'N'] == 1,
- cursor: m2[&'S'],
- undeline: m2[&'U'] == 1,
- },
- ),
- "Standard Style" => CaptionData::Style(
- PreambleType::Standard,
- Style {
- channel: m1[&'C'] == 1,
- row: m1[&'R'],
- next_row: m2[&'N'] == 1,
- style: m2[&'S'],
- undeline: m2[&'U'] == 1,
- },
- ),
- "Extended Style" => CaptionData::Style(
- PreambleType::Extended,
- Style {
- channel: m1[&'C'] == 1,
- row: m1[&'R'],
- next_row: m2[&'N'] == 1,
- style: m2[&'S'],
- undeline: m2[&'U'] == 1,
- },
- ),
- "Midrow Style" => CaptionData::MidrowStyle {
- channel: m1[&'C'] == 1,
- underlined: m2[&'U'] == 1,
- style: m2[&'S'],
- },
- "Special North American character set" => CaptionData::CharacterSpecial {
- channel: m1[&'C'] == 1,
- char: m2[&'C'],
- },
- other => todo!("{}: {:?}", other, (m1, m2))
- };
- if !valid(&data) {
- continue;
- };
- // println!("{:?}", data);
- return Ok(data);
- }
- }
- panic!("Invalid CC: {:?}", (c1, c2));
- }
- fn read_block<R: Read>(rdr: &mut R) -> Result<Caption, Box<dyn std::error::Error>> {
- let mut field: [u8; 3] = [0; 3];
- if rdr.read(&mut field)? != 3 {
- panic!("Read less than 3 bytes while decoding CC block");
- };
- let filler = field[0] >> 1;
- if filler != 0x7f {
- panic!("caption filler should be 0x7f, was 0x{:02x}", filler);
- }
- Ok(Caption {
- odd_field: field[0] & 1 == 1,
- data: parse_caption_data(&field[1..])?,
- })
- }
- fn read_cc<R: Read>(rdr: &mut R) -> Result<CC, Box<dyn std::error::Error>> {
- let user_data_type = rdr.read_u8()?;
- if user_data_type != 1 {
- panic!("Invalid user_data_type: {}", user_data_type);
- }
- let block_size = rdr.read_u8()?;
- let flags = rdr.read_u8()?;
- let odd_field_first = (flags & 0b10000000) != 0;
- let filler = (flags & 0b01000000) != 0;
- let block_count = (flags & 0b00111110) >> 1;
- let extra_field = flags & 1;
- let num_blocks = (block_count * 2) + extra_field;
- let mut block: Vec<Caption> = (0..num_blocks)
- .map(|_| read_block(rdr))
- .collect::<Result<_, _>>()?;
- let block = block
- .drain(..)
- .filter(|v| v.data != CaptionData::Padding)
- .collect();
- Ok(CC {
- user_data_type,
- block_size,
- odd_field_first,
- filler,
- block,
- })
- }
- fn main() -> Result<(), Box<dyn std::error::Error>> {
- const CC_START: [u8; 6] = [0x00, 0x00, 0x01, 0xB2, 0x43, 0x43];
- let mut search_buffer = [0u8; 6];
- let path = r#"E:\batch_demux\chapter_grabber\out\Zim_Vol_01_Disc_01_d858015e8310c49ed580fc1d4fc362e9\t004_a001_0x1e0.m2v"#;
- let mut reader = BufReader::new(File::open(path)?);
- let mut proc = Command::new("ffprobe")
- .args(&[
- "-probesize",
- &format!("{}", 0x7FFFFFFF),
- "-analyzeduration",
- &format!("{}", 0x7FFFFFFF),
- "-v",
- "fatal",
- "-f",
- "lavfi",
- "-i",
- &format!("movie=\\'{}\\'[out+subcc]", path.replace("\\", "/")),
- "-show_packets",
- "-print_format",
- "compact",
- ])
- .stdout(Stdio::piped())
- .spawn()?;
- let ffmpeg = BufReader::new(proc.stdout.take().unwrap());
- let mut state = CCState::new();
- for line in ffmpeg.lines() {
- if let Some(entry) = parse_line(&line?) {
- reader.seek(std::io::SeekFrom::Start(entry.pos + 42))?;
- let bytes_read = reader.read(&mut search_buffer)?;
- let has_header = bytes_read == search_buffer.len() && search_buffer == CC_START;
- if has_header {
- let cc = read_cc(&mut reader)?;
- for b in &cc.block {
- if let Some(line) = state.update(&b.data) {
- println!("{} {:?}",entry.time,line)
- };
- }
- } else {
- panic!("No caption header found!");
- };
- }
- }
- proc.wait()?;
- Ok(())
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement