gecko/media/webvtt/cuetext.c
Caitlin Potter a86f92136e Bug 833403 - Integrate libwebvtt into Mozilla. r=ted
- Updated for new build system (2/28/2013)
 - declaration-after-statement no longer breaking MSVC build
 - Source files and scripts now contain appropriate license info
 - media/webvtt/update.sh no longer hiding unexpected/significant
   errors.
2013-03-01 19:25:19 -05:00

791 lines
25 KiB
C

/**
* Copyright (c) 2013 Mozilla Foundation and Contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <string.h>
#include "parser_internal.h"
#include "cuetext_internal.h"
#include "cue_internal.h"
#include "string_internal.h"
static void webvtt_skipwhite( webvtt_byte **position );
#ifdef min
# undef min
#endif
#define min(a,b) ( (a) < (b) ? (a) : (b) )
/**
* ERROR macro used for webvtt_parse_cuetext
*/
#undef ERROR
#define ERROR(code) \
do \
{ \
if( self->error ) \
if( self->error( self->userdata, line, col, code ) < 0 ) \
return WEBVTT_PARSE_ERROR; \
} while(0)
/**
* Macros for return statuses based on memory operations.
* This is to avoid many if statements checking for multiple memory operation
* return statuses in functions.
*/
#define CHECK_MEMORY_OP(status) \
if( status != WEBVTT_SUCCESS ) \
return status; \
#define CHECK_MEMORY_OP_JUMP(status_var, returned_status) \
if( returned_status != WEBVTT_SUCCESS) \
{ \
status_var = returned_status; \
goto dealloc; \
} \
/**
* This will only work on null-terminated strings, remember that!
*/
static void
webvtt_skipwhite( webvtt_byte **position )
{
webvtt_byte *p = *position;
while( *p && webvtt_iswhite(*p) ) {
++p;
}
*position = p;
}
WEBVTT_INTERN webvtt_status
webvtt_create_cuetext_token( webvtt_cuetext_token **token, webvtt_cuetext_token_type token_type )
{
webvtt_cuetext_token *temp_token = (webvtt_cuetext_token *)webvtt_alloc0( sizeof(*temp_token) );
if( !temp_token ) {
return WEBVTT_OUT_OF_MEMORY;
}
temp_token->token_type = token_type;
*token = temp_token;
return WEBVTT_SUCCESS;
}
WEBVTT_INTERN webvtt_status
webvtt_create_cuetext_start_token( webvtt_cuetext_token **token, webvtt_string *tag_name,
webvtt_stringlist *css_classes, webvtt_string *annotation )
{
webvtt_status status;
webvtt_cuetext_start_token_data sd;
if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, START_TOKEN ) ) ) {
return status;
}
webvtt_copy_string( &(*token)->tag_name, tag_name );
webvtt_copy_stringlist( &sd.css_classes, css_classes );
webvtt_copy_string( &sd.annotations, annotation );
(*token)->start_token_data = sd;
return WEBVTT_SUCCESS;
}
WEBVTT_INTERN webvtt_status
webvtt_create_cuetext_end_token( webvtt_cuetext_token **token, webvtt_string *tag_name )
{
webvtt_status status;
if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, END_TOKEN ) ) ) {
return status;
}
webvtt_copy_string( &(*token)->tag_name, tag_name );
return WEBVTT_SUCCESS;
}
WEBVTT_INTERN webvtt_status
webvtt_create_cuetext_text_token( webvtt_cuetext_token **token, webvtt_string *text )
{
webvtt_status status;
if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, TEXT_TOKEN ) ) ) {
return status;
}
webvtt_copy_string( &(*token)->text, text);
return WEBVTT_SUCCESS;
}
WEBVTT_INTERN webvtt_status
webvtt_create_cuetext_timestamp_token( webvtt_cuetext_token **token, webvtt_timestamp time_stamp )
{
webvtt_status status;
if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, TIME_STAMP_TOKEN ) ) ) {
return status;
}
(*token)->time_stamp = time_stamp;
return WEBVTT_SUCCESS;
}
WEBVTT_INTERN void
webvtt_delete_cuetext_token( webvtt_cuetext_token **token )
{
webvtt_cuetext_start_token_data data;
webvtt_cuetext_token *t;
if( !token ) {
return;
}
if( !*token ) {
return;
}
t = *token;
/**
* Note that time stamp tokens do not need to free any internal data because
* they do not allocate anything.
*/
switch( t->token_type ) {
case START_TOKEN:
data = t->start_token_data;
webvtt_release_stringlist( &data.css_classes );
webvtt_release_string( &data.annotations );
webvtt_release_string( &t->tag_name );
break;
case END_TOKEN:
webvtt_release_string( &t->tag_name );
break;
case TEXT_TOKEN:
webvtt_release_string( &t->text );
break;
}
webvtt_free( t );
*token = 0;
}
/**
* Definitions for tag names that accept annotationsm
*/
#define V_TAG_LENGTH 1
webvtt_byte v_tag[V_TAG_LENGTH] = { UTF8_V };
WEBVTT_INTERN int
tag_accepts_annotation( webvtt_string *tag_name )
{
return memcmp( webvtt_string_text( tag_name ), v_tag,
min(webvtt_string_length( tag_name ), V_TAG_LENGTH) ) == 0;
}
/**
* Definitions for tag tokens that are more then one character long.
*/
#define RUBY_TAG_LENGTH 4
#define RUBY_TEXT_TAG_LENGTH 2
webvtt_byte ruby_tag[RUBY_TAG_LENGTH] = { UTF8_R, UTF8_U, UTF8_B, UTF8_Y };
webvtt_byte rt_tag[RUBY_TEXT_TAG_LENGTH] = { UTF8_R, UTF8_T };
WEBVTT_INTERN webvtt_status
webvtt_get_node_kind_from_tag_name( webvtt_string *tag_name, webvtt_node_kind *kind )
{
if( !tag_name || !kind ) {
return WEBVTT_INVALID_PARAM;
}
if( webvtt_string_length(tag_name) == 1 ) {
switch( webvtt_string_text(tag_name)[0] ) {
case( UTF8_B ):
*kind = WEBVTT_BOLD;
break;
case( UTF8_I ):
*kind = WEBVTT_ITALIC;
break;
case( UTF8_U ):
*kind = WEBVTT_UNDERLINE;
break;
case( UTF8_C ):
*kind = WEBVTT_CLASS;
break;
case( UTF8_V ):
*kind = WEBVTT_VOICE;
break;
}
} else if( memcmp( webvtt_string_text(tag_name), ruby_tag, min(webvtt_string_length(tag_name), RUBY_TAG_LENGTH) ) == 0 ) {
*kind = WEBVTT_RUBY;
} else if( memcmp( webvtt_string_text(tag_name), rt_tag, min(webvtt_string_length(tag_name), RUBY_TEXT_TAG_LENGTH) ) == 0 ) {
*kind = WEBVTT_RUBY_TEXT;
} else {
return WEBVTT_INVALID_TAG_NAME;
}
return WEBVTT_SUCCESS;
}
WEBVTT_INTERN webvtt_status
webvtt_create_node_from_token( webvtt_cuetext_token *token, webvtt_node **node, webvtt_node *parent )
{
webvtt_node_kind kind;
if( !token || !node || !parent ) {
return WEBVTT_INVALID_PARAM;
}
/**
* We've recieved a node that is not null.
* In order to prevent memory leaks caused by overwriting a node which the
* caller has not released return unsuccessful.
*/
if( *node ) {
return WEBVTT_UNSUCCESSFUL;
}
switch ( token->token_type ) {
case( TEXT_TOKEN ):
return webvtt_create_text_leaf_node( node, parent, &token->text );
break;
case( START_TOKEN ):
CHECK_MEMORY_OP( webvtt_get_node_kind_from_tag_name( &token->tag_name, &kind) );
return webvtt_create_internal_node( node, parent, kind,
token->start_token_data.css_classes, &token->start_token_data.annotations );
break;
case ( TIME_STAMP_TOKEN ):
return webvtt_create_time_stamp_leaf_node( node, parent, token->time_stamp );
break;
default:
return WEBVTT_INVALID_TOKEN_TYPE;
}
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_data_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *result )
{
for ( ; *token_state == DATA; (*position)++ ) {
switch( **position ) {
case UTF8_AMPERSAND:
*token_state = ESCAPE;
break;
case UTF8_LESS_THAN:
if( webvtt_string_length(result) == 0 ) {
*token_state = TAG;
} else {
return WEBVTT_SUCCESS;
}
break;
case UTF8_NULL_BYTE:
return WEBVTT_SUCCESS;
break;
default:
CHECK_MEMORY_OP( webvtt_string_putc( result, *position[0] ) );
break;
}
}
return WEBVTT_UNFINISHED;
}
/**
* Definitions for valid escape values.
* The semicolon is implicit in the comparison.
*/
#define AMP_ESCAPE_LENGTH 4
#define LT_ESCAPE_LENGTH 3
#define GT_ESCAPE_LENGTH 3
#define RLM_ESCAPE_LENGTH 4
#define LRM_ESCAPE_LENGTH 4
#define NBSP_ESCAPE_LENGTH 5
#define RLM_REPLACE_LENGTH 3
#define LRM_REPLACE_LENGTH 3
#define NBSP_REPLACE_LENGTH 2
webvtt_byte amp_escape[AMP_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_A, UTF8_M, UTF8_P };
webvtt_byte lt_escape[LT_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_L, UTF8_T };
webvtt_byte gt_escape[GT_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_G, UTF8_T };
webvtt_byte rlm_escape[RLM_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_R, UTF8_L, UTF8_M };
webvtt_byte lrm_escape[LRM_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_L, UTF8_R, UTF8_M };
webvtt_byte nbsp_escape[NBSP_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_N, UTF8_B, UTF8_S, UTF8_P };
webvtt_byte rlm_replace[RLM_REPLACE_LENGTH] = { UTF8_RIGHT_TO_LEFT_1,
UTF8_RIGHT_TO_LEFT_2, UTF8_RIGHT_TO_LEFT_3 };
webvtt_byte lrm_replace[LRM_REPLACE_LENGTH] = { UTF8_LEFT_TO_RIGHT_1,
UTF8_LEFT_TO_RIGHT_2, UTF8_LEFT_TO_RIGHT_3 };
webvtt_byte nbsp_replace[NBSP_REPLACE_LENGTH] = { UTF8_NO_BREAK_SPACE_1,
UTF8_NO_BREAK_SPACE_2 };
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_escape_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *result )
{
webvtt_string buffer;
webvtt_status status = WEBVTT_SUCCESS;
CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) );
/**
* Append ampersand here because the algorithm is not able to add it to the
* buffer when it reads it in the DATA state tokenizer.
*/
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, UTF8_AMPERSAND ) );
for( ; *token_state == ESCAPE; (*position)++ ) {
/**
* We have encountered a token termination point.
* Append buffer to result and return success.
*/
if( **position == UTF8_NULL_BYTE || **position == UTF8_LESS_THAN ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
goto dealloc;
}
/**
* This means we have enocuntered a malformed escape character sequence.
* This means that we need to add that malformed text to the result and
* recreate the buffer to prepare for a new escape sequence.
*/
else if( **position == UTF8_AMPERSAND ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
webvtt_release_string( &buffer );
CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) );
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, *position[0] ) );
}
/**
* We've encountered the semicolon which is the end of an escape sequence.
* Check if buffer contains a valid escape sequence and if it does append
* the interpretation to result and change the state to DATA.
*/
else if( **position == UTF8_SEMI_COLON ) {
if( memcmp( webvtt_string_text(&buffer), amp_escape, min(webvtt_string_length(&buffer), AMP_ESCAPE_LENGTH ) ) == 0 ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, UTF8_AMPERSAND ) );
} else if( memcmp( webvtt_string_text(&buffer), lt_escape, min(webvtt_string_length(&buffer), LT_ESCAPE_LENGTH ) ) == 0 ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, UTF8_LESS_THAN ) );
} else if( memcmp( webvtt_string_text(&buffer), gt_escape, min(webvtt_string_length(&buffer), GT_ESCAPE_LENGTH) ) == 0 ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, UTF8_GREATER_THAN ) );
} else if( memcmp( webvtt_string_text(&buffer), rlm_escape, min(webvtt_string_length(&buffer), RLM_ESCAPE_LENGTH) ) == 0 ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, rlm_replace, RLM_REPLACE_LENGTH ) );
} else if( memcmp( webvtt_string_text(&buffer), lrm_escape, min(webvtt_string_length(&buffer), LRM_ESCAPE_LENGTH) ) == 0 ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, lrm_replace, LRM_REPLACE_LENGTH ) );
} else if( memcmp( webvtt_string_text(&buffer), nbsp_escape, min(webvtt_string_length(&buffer), NBSP_ESCAPE_LENGTH) ) == 0 ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, nbsp_replace, NBSP_REPLACE_LENGTH ) );
} else {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) );
}
*token_state = DATA;
}
/**
* Character is alphanumeric. This means we are in the body of the escape
* sequence.
*/
else if( webvtt_isalphanum( **position ) ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) );
}
/**
* If we have not found an alphanumeric character then we have encountered
* a malformed escape sequence. Add buffer to result and continue to parse
* in DATA state.
*/
else {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) );
*token_state = DATA;
}
}
dealloc:
webvtt_release_string( &buffer );
return status;
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_tag_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *result )
{
for( ; *token_state == TAG; (*position)++ ) {
if( **position == UTF8_TAB || **position == UTF8_LINE_FEED ||
**position == UTF8_CARRIAGE_RETURN || **position == UTF8_FORM_FEED ||
**position == UTF8_SPACE ) {
*token_state = START_TAG_ANNOTATION;
} else if( webvtt_isdigit( **position ) ) {
CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) );
*token_state = TIME_STAMP_TAG;
} else {
switch( **position ) {
case UTF8_FULL_STOP:
*token_state = START_TAG_CLASS;
break;
case UTF8_SOLIDUS:
*token_state = END_TAG;
break;
case UTF8_GREATER_THAN:
return WEBVTT_SUCCESS;
break;
case UTF8_NULL_BYTE:
return WEBVTT_SUCCESS;
break;
default:
CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) );
*token_state = START_TAG;
}
}
}
return WEBVTT_UNFINISHED;
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_start_tag_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *result )
{
for( ; *token_state == START_TAG; (*position)++ ) {
if( **position == UTF8_TAB || **position == UTF8_FORM_FEED ||
**position == UTF8_SPACE || **position == UTF8_LINE_FEED ||
**position == UTF8_CARRIAGE_RETURN ) {
*token_state = START_TAG_ANNOTATION;
} else {
switch( **position ) {
case UTF8_TAB:
*token_state = START_TAG_ANNOTATION;
break;
case UTF8_FULL_STOP:
*token_state = START_TAG_CLASS;
break;
case UTF8_GREATER_THAN:
return WEBVTT_SUCCESS;
break;
default:
CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) );
break;
}
}
}
return WEBVTT_UNFINISHED;
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_start_tag_class_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_stringlist *css_classes )
{
webvtt_string buffer;
webvtt_status status = WEBVTT_SUCCESS;
CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) );
for( ; *token_state == START_TAG_CLASS; (*position)++ ) {
if( **position == UTF8_TAB || **position == UTF8_FORM_FEED ||
**position == UTF8_SPACE || **position == UTF8_LINE_FEED ||
**position == UTF8_CARRIAGE_RETURN) {
CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
*token_state = START_TAG_ANNOTATION;
return WEBVTT_SUCCESS;
} else if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
webvtt_release_string( &buffer );
return WEBVTT_SUCCESS;
} else if( **position == UTF8_FULL_STOP ) {
CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
webvtt_release_string( &buffer );
CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) );
} else {
CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) );
}
}
dealloc:
webvtt_release_string( &buffer );
return status;
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_start_tag_annotation_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *annotation )
{
for( ; *token_state == START_TAG_ANNOTATION; (*position)++ ) {
if( **position == UTF8_NULL_BYTE || **position == UTF8_GREATER_THAN ) {
return WEBVTT_SUCCESS;
}
CHECK_MEMORY_OP( webvtt_string_putc( annotation, **position ) );
}
return WEBVTT_UNFINISHED;
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_end_tag_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *result )
{
for( ; *token_state == END_TAG; (*position)++ ) {
if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) {
return WEBVTT_SUCCESS;
}
CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) );
}
return WEBVTT_UNFINISHED;
}
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer_time_stamp_tag_state( webvtt_byte **position,
webvtt_cuetext_token_state *token_state, webvtt_string *result )
{
for( ; *token_state == TIME_STAMP_TAG; (*position)++ ) {
if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) {
return WEBVTT_SUCCESS;
}
CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) );
}
return WEBVTT_UNFINISHED;
}
/**
* Need to set up differently.
* Get a status in order to return at end and release memeory.
*/
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token )
{
webvtt_cuetext_token_state token_state = DATA;
webvtt_string result, annotation;
webvtt_stringlist *css_classes;
webvtt_timestamp time_stamp = 0;
webvtt_status status = WEBVTT_UNFINISHED;
if( !position ) {
return WEBVTT_INVALID_PARAM;
}
webvtt_create_string( 10, &result );
webvtt_create_string( 10, &annotation );
webvtt_create_stringlist( &css_classes );
/**
* Loop while the tokenizer is not finished.
* Based on the state of the tokenizer enter a function to handle that
* particular tokenizer state. Those functions will loop until they either
* change the state of the tokenizer or reach a valid token end point.
*/
while( status == WEBVTT_UNFINISHED ) {
switch( token_state ) {
case DATA :
status = webvtt_cuetext_tokenizer_data_state( position, &token_state, &result );
break;
case ESCAPE:
status = webvtt_cuetext_tokenizer_escape_state( position, &token_state, &result );
break;
case TAG:
status = webvtt_cuetext_tokenizer_tag_state( position, &token_state, &result );
break;
case START_TAG:
status = webvtt_cuetext_tokenizer_start_tag_state( position, &token_state, &result );
break;
case START_TAG_CLASS:
status = webvtt_cuetext_tokenizer_start_tag_class_state( position, &token_state, css_classes );
break;
case START_TAG_ANNOTATION:
status = webvtt_cuetext_tokenizer_start_tag_annotation_state( position, &token_state, &annotation );
break;
case END_TAG:
status = webvtt_cuetext_tokenizer_end_tag_state( position, &token_state, &result );
break;
case TIME_STAMP_TAG:
status = webvtt_cuetext_tokenizer_time_stamp_tag_state( position, &token_state, &result );
break;
}
if( token_state == START_TAG_ANNOTATION ) {
webvtt_skipwhite( position );
}
}
if( **position == UTF8_GREATER_THAN )
{ (*position)++; }
if( status == WEBVTT_SUCCESS ) {
/**
* The state that the tokenizer left off on will tell us what kind of token
* needs to be made.
*/
if( token_state == DATA || token_state == ESCAPE ) {
status = webvtt_create_cuetext_text_token( token, &result );
} else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS ||
token_state == START_TAG_ANNOTATION) {
/**
* If the tag does not accept an annotation then release the current
* annotation and intialize annotation to a safe empty state
*/
if( !tag_accepts_annotation( &result ) ) {
webvtt_release_string( &annotation );
webvtt_init_string( &annotation );
}
status = webvtt_create_cuetext_start_token( token, &result, css_classes, &annotation );
} else if( token_state == END_TAG ) {
status = webvtt_create_cuetext_end_token( token, &result );
} else if( token_state == TIME_STAMP_TAG ) {
parse_timestamp( webvtt_string_text( &result ), &time_stamp );
status = webvtt_create_cuetext_timestamp_token( token, time_stamp );
} else {
status = WEBVTT_INVALID_TOKEN_STATE;
}
}
webvtt_release_stringlist( &css_classes );
webvtt_release_string( &result );
webvtt_release_string( &annotation );
return status;
}
/**
* Currently line and len are not being kept track of.
* Don't think pnode_length is needed as nodes track there list count
* internally.
*/
WEBVTT_INTERN webvtt_status
webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, webvtt_string *payload, int finished )
{
const webvtt_byte *cue_text;
webvtt_status status;
webvtt_byte *position;
webvtt_node *node_head;
webvtt_node *current_node;
webvtt_node *temp_node;
webvtt_cuetext_token *token;
webvtt_node_kind kind;
if( !cue ) {
return WEBVTT_INVALID_PARAM;
}
cue_text = webvtt_string_text( payload );
if( !cue_text ) {
return WEBVTT_INVALID_PARAM;
}
if ( WEBVTT_FAILED(status = webvtt_create_head_node( &cue->node_head ) ) ) {
return status;
}
position = (webvtt_byte *)cue_text;
node_head = cue->node_head;
current_node = node_head;
temp_node = NULL;
token = NULL;
/**
* Routine taken from the W3C specification
* http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules
*/
while( *position != UTF8_NULL_BYTE ) {
webvtt_delete_cuetext_token( &token );
/* Step 7. */
switch( webvtt_cuetext_tokenizer( &position, &token ) ) {
case( WEBVTT_UNFINISHED ):
/* Error here. */
break;
/* Step 8. */
case( WEBVTT_SUCCESS ):
/**
* If we've found an end token which has a valid end token tag name and
* a tag name that is equal to the current node then set current to the
* parent of current.
*/
if( token->token_type == END_TOKEN ) {
/**
* We have encountered an end token but we are at the top of the list
* and thus have not encountered any start tokens yet, throw away the
* token.
*/
if( current_node->kind == WEBVTT_HEAD_NODE ) {
continue;
}
/**
* We have encountered an end token but it is not in a format that is
* supported, throw away the token.
*/
if( webvtt_get_node_kind_from_tag_name( &token->tag_name, &kind ) == WEBVTT_INVALID_TAG_NAME ) {
continue;
}
/**
* We have encountered an end token and it matches the start token of
* the node that we are currently on. Move back up the list of nodes
* and continue parsing.
*/
if( current_node->kind == kind ) {
current_node = current_node->parent;
}
} else {
/**
* Attempt to create a valid node from the token.
* If successful then attach the node to the current nodes list and
* also set current to the newly created node if it is an internal
* node type.
*/
if( webvtt_create_node_from_token( token, &temp_node, current_node ) != WEBVTT_SUCCESS ) {
/* Do something here? */
}
else {
webvtt_attach_internal_node( current_node, temp_node );
if( WEBVTT_IS_VALID_INTERNAL_NODE( temp_node->kind ) ) {
current_node = temp_node;
}
/* Release the node as attach internal node increases the count. */
webvtt_release_node( &temp_node );
}
}
break;
}
webvtt_skipwhite( &position );
}
webvtt_delete_cuetext_token( &token );
return WEBVTT_SUCCESS;
}