/* * decoder for Closed Captions * * This decoder relies on MPlayer's OSD to display subtitles. * Be warned that decoding is somewhat preliminary, though it basically works. * * Most notably, only the text information is decoded as of now, discarding * color, background and position info (see source below). * * uses source from the xine closed captions decoder * * Copyright (C) 2002 Matteo Giani * * This file is part of MPlayer. * * MPlayer is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * MPlayer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with MPlayer; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include #include "config.h" #include "mp_msg.h" #include "sub_cc.h" #include "subreader.h" #include "libvo/video_out.h" #include "sub.h" #include "libavutil/avutil.h" #define CC_MAX_LINE_LENGTH 64 static char chartbl[128]; static subtitle buf1,buf2; static subtitle *fb,*bb; static unsigned int cursor_pos=0; static int initialized=0; static int wtv_format; #define CC_ROLLON 1 #define CC_ROLLUP 2 static int cc_mode=CC_ROLLON; static int cc_lines=4; ///< number of visible rows in CC roll-up mode, not used in CC roll-on mode static void build_char_table(void) { int i; /* first the normal ASCII codes */ for (i = 0; i < 128; i++) chartbl[i] = (char) i; /* now the special codes */ chartbl[0x2a] = 0xe1; /* Latin Small Letter A with acute */ chartbl[0x5c] = 0xe9; /* Latin Small Letter E with acute */ chartbl[0x5e] = 0xed; /* Latin Small Letter I with acute */ chartbl[0x5f] = 0xf3; /* Latin Small Letter O with acute */ chartbl[0x60] = 0xfa; /* Latin Small Letter U with acute */ chartbl[0x7b] = 0xe7; /* Latin Small Letter C with cedilla */ chartbl[0x7c] = 0xf7; /* Division sign */ chartbl[0x7d] = 0xd1; /* Latin Capital letter N with tilde */ chartbl[0x7e] = 0xf1; /* Latin Small Letter N with tilde */ chartbl[0x7f] = 0xa4; /* Currency sign FIXME: this should be a solid block */ } static void clear_buffer(subtitle *buf) { int i; buf->lines=0; for (i = 0; i < SUB_MAX_TEXT; i++) { free(buf->text[i]); buf->text[i] = NULL; } } /** \brief scroll buffer one line up \param buf buffer to scroll */ static void scroll_buffer(subtitle* buf) { int i; while(buf->lines > cc_lines) { free(buf->text[0]); for(i = 0; i < buf->lines - 1; i++) buf->text[i] = buf->text[i+1]; buf->text[buf->lines-1] = NULL; buf->lines--; } } static int channel; void subcc_init(void) { int i; //printf("subcc_init(): initing...\n"); build_char_table(); for(i=0;ilines) {bb->lines++; cursor_pos=0;} if(bb->text[bb->lines - 1]==NULL) { bb->text[bb->lines - 1] = calloc(1, CC_MAX_LINE_LENGTH); cursor_pos=0; } if(c=='\n') { if(cursor_pos>0 && bb->lines < SUB_MAX_TEXT) { bb->lines++;cursor_pos=0; if(cc_mode==CC_ROLLUP){ //Carriage return - scroll buffer one line up bb->text[bb->lines - 1]=calloc(1, CC_MAX_LINE_LENGTH); scroll_buffer(bb); } } } else { if(cursor_pos==CC_MAX_LINE_LENGTH-1) { fprintf(stderr,"CC: append_char() reached CC_MAX_LINE_LENGTH!\n"); return; } bb->text[bb->lines - 1][cursor_pos++]=c; } //In CC roll-up mode data should be shown immediately if(cc_mode==CC_ROLLUP) display_buffer(bb); } static void swap_buffers(void) { subtitle *foo; foo=fb; fb=bb; bb=foo; } static int selected_channel(void) { return subcc_enabled - 1; } static void cc_decode_EIA608(unsigned short int data) { static unsigned short int lastcode=0x0000; uint8_t c1 = data & 0x7f; uint8_t c2 = (data >> 8) & 0x7f; if (c1 & 0x60) { /* normal character, 0x20 <= c1 <= 0x7f */ if (channel != (selected_channel() & 1)) return; append_char(chartbl[c1]); if(c2 & 0x60) /*c2 might not be a normal char even if c1 is*/ append_char(chartbl[c2]); } else if (c1 & 0x10) // control code / special char { channel = (c1 & 0x08) >> 3; if (channel != (selected_channel() & 1)) return; c1&=~0x08; if(data!=lastcode) { if(c2 & 0x40) { /*PAC, Preamble Address Code */ append_char('\n'); /*FIXME properly interpret PACs*/ } else switch(c1) { case 0x10: break; // ext attribute case 0x11: if((c2 & 0x30)==0x30) { //printf("[debug]:Special char (ignored)\n"); /*cc_decode_special_char()*/; } else if (c2 & 0x20) { //printf("[debug]: midrow_attr (ignored)\n"); /*cc_decode_midrow_attr()*/; } break; case 0x14: switch(c2) { case 0x00: //CC roll-on mode cc_mode=CC_ROLLON; break; case 0x25: //CC roll-up, 2 rows case 0x26: //CC roll-up, 3 rows case 0x27: //CC roll-up, 4 rows cc_lines=c2-0x23; cc_mode=CC_ROLLUP; break; case 0x2C: display_buffer(NULL); //EDM clear_buffer(fb); break; case 0x2d: append_char('\n'); //carriage return break; case 0x2e: clear_buffer(bb); //ENM break; case 0x2f: swap_buffers(); //Swap buffers display_buffer(fb); clear_buffer(bb); break; } break; case 0x17: if( c2>=0x21 && c2<=0x23) //TAB { break; } } } } lastcode=data; } static void subcc_decode(const uint8_t *inputbuffer, unsigned int inputlength) { /* The first number may denote a channel number. I don't have the * EIA-708 standard, so it is hard to say. * From what I could figure out so far, the general format seems to be: * * repeat * * 0xfe starts 2 byte sequence of unknown purpose. It might denote * field #2 in line 21 of the VBI. * Treating it identical of 0xff fixes * http://samples.mplayerhq.hu/MPEG-VOB/ClosedCaptions/Starship_Troopers.vob * * 0xff starts 2 byte EIA-608 sequence, field #1 in line 21 of the VBI. * Followed by a 3-code triplet that starts either with 0xff or * 0xfe. In either case, the following triplet needs to be ignored * for line 21, field 1. * * 0x00 is padding, followed by 2 more 0x00. * * 0x01 always seems to appear at the beginning, always seems to * be followed by 0xf8, 8-bit number. * The lower 7 bits of this 8-bit number seem to denote the * number of code triplets that follow. * The most significant bit denotes whether the Line 21 field 1 * captioning information is at odd or even triplet offsets from this * beginning triplet. 1 denotes odd offsets, 0 denotes even offsets. * * Most captions are encoded with odd offsets, so this is what we * will assume. * * until end of packet */ const uint8_t *current = inputbuffer; unsigned int curbytes = 0; uint8_t data1, data2; uint8_t cc_code; int odd_offset = 1; while (curbytes < inputlength) { cc_code = current[0]; if (inputlength - curbytes < 2) { #ifdef LOG_DEBUG fprintf(stderr, "Not enough data for 2-byte CC encoding\n"); #endif break; } data1 = current[1]; data2 = current[2]; current += 3; curbytes += 3; // 0xfe/0xff are both used on plain EIA-608 CC and // for extended EIA-708 (where 0xfc/0xfd is used for // compatibility layer). // Allow using channel bit 2 to select between which // ones to look in. switch (cc_code) { case 0xfc: case 0xfd: case 0xfe: case 0xff: if ((cc_code & 2) == (selected_channel() & 4) >> 1) break; odd_offset ^= 1; if (odd_offset != (selected_channel() & 2) >> 1) break; /* expect EIA-608 CC1/CC2 encoding */ // FIXME check parity! // Parity check omitted assuming we are reading from a DVD and therefore // we should encounter no "transmission errors". cc_decode_EIA608(data1 | (data2 << 8)); break; case 0xfa: case 0x00: /* This seems to be just padding */ break; case 0x01: odd_offset = data2 >> 7; break; default: //#ifdef LOG_DEBUG fprintf(stderr, "Unknown CC encoding: %x\n", cc_code); //#endif break; } } } static const uint8_t mov_cc_signature_1[] = {0, 0, 0, 0xa, 'c', 'd', 'a', 't'}; static const uint8_t mov_cc_signature_2[] = {0, 0, 0, 0xa, 'c', 'd', 't', '2'}; /** * MOV uses a vastly more verbose representation for EIA 608 CC data than DVDs. * This function handles that case. */ static void mov_subcc_decode(const uint8_t *data, unsigned len) { while (len >= 10) { int channel = -1; if (memcmp(data, mov_cc_signature_1, sizeof(mov_cc_signature_1)) == 0) { channel = 0; } else if (memcmp(data, mov_cc_signature_2, sizeof(mov_cc_signature_2)) == 0) { channel = 1; } else { mp_msg(MSGT_OSD, MSGL_V, "Unknown MOV 608 CC formatting\n"); data++; len--; continue; } if (channel == selected_channel() >> 1) cc_decode_EIA608(data[8] | (data[9] << 8)); data += 10; len -= 10; } } void subcc_process_data(const uint8_t *inputdata, unsigned int len) { int mov_mode = len >= 10 && memcmp(inputdata, mov_cc_signature_1, sizeof(mov_cc_signature_1)) == 0; if(!subcc_enabled) return; if(!initialized) subcc_init(); if (mov_mode) { mov_subcc_decode(inputdata, len); return; } if (len & 1) wtv_format = 0; if (len == 2) { // EIA-608 compatibility part. // Full EIA-708 parts have length >= 4 (multiple of 2). cc_decode_EIA608(inputdata[0] | (inputdata[1] << 8)); wtv_format = 1; } if (wtv_format) return; subcc_decode(inputdata, len); } /** * This processes CC captions in the format as found in ATSC broadcasts. * Like DVD CC it is stored inside the MPEG-frame userdata, but with two * differences: * 1) It starts with "GA" instead of "CC" * 2) It _must_ be reordered in the way the decoder reorders the video frames * The latter makes things difficult and is the reason why there is no support * for this yet beyond this function. */ void subcc_process_eia708(const uint8_t *data, int len) { int cc_count; if (!subcc_enabled) return; if (!initialized) subcc_init(); if (len <= 5) return; if (data[0] != '9' || data[1] != '4' || data[2] != 3) { mp_msg(MSGT_OSD, MSGL_ERR, "Unknown ATSC CC type " "0x%"PRIx8" 0x%"PRIx8" 0x%"PRIx8"\n", data[0], data[1], data[2]); return; } // process_cc_data_flag if (!(data[3] & 0x40)) return; cc_count = data[3] & 0x1f; data += 5; len -= 5; cc_count = FFMIN(cc_count, len / 3); while (cc_count--) { // EAI-608 data if ((data[0] & 0xfe) == 0xfc && (data[0] & 1) == selected_channel() >> 1) cc_decode_EIA608(data[1] | (data[2] << 8)); data += 3; } }