/** * Copyright (c) 2013 Mozilla Foundation and Contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "string_internal.h" #include #include static webvtt_string_data empty_string = { { 1 }, /* init refcount */ 0, /* length */ 0, /* capacity */ empty_string.array, /* text */ { '\0' } /* array */ }; WEBVTT_EXPORT void webvtt_init_string( webvtt_string *result ) { if( result ) { result->d = &empty_string; webvtt_ref( &result->d->refs ); } } WEBVTT_EXPORT webvtt_uint webvtt_string_is_empty( const webvtt_string *str ) { return str->d == &empty_string || webvtt_string_length( str ) == 0 ? 1 : 0; } /** * Allocate new string. */ WEBVTT_EXPORT webvtt_status webvtt_create_string( webvtt_uint32 alloc, webvtt_string *result ) { webvtt_string_data *d; if( !result ) { return WEBVTT_INVALID_PARAM; } d = ( webvtt_string_data * )webvtt_alloc( sizeof( webvtt_string_data ) + ( alloc * sizeof( webvtt_byte ) ) ); if( !d ) { return WEBVTT_OUT_OF_MEMORY; } d->refs.value = 1; d->alloc = alloc; d->length = 0; d->text = d->array; d->text[0] = 0; result->d = d; return WEBVTT_SUCCESS; } WEBVTT_EXPORT webvtt_status webvtt_create_string_with_text( webvtt_string *result, const webvtt_byte *init_text, int len ) { webvtt_uint pos = 0; if( !result ) { return WEBVTT_INVALID_PARAM; } if( !init_text ) { webvtt_init_string( result ); return WEBVTT_SUCCESS; } if( len < 0 ) { len = strlen( ( const char * )init_text ); } if( len == 0 ) { webvtt_init_string( result ); return WEBVTT_SUCCESS; } /** * initialize the string by referencing empty_string */ webvtt_init_string( result ); /** * append the appropriate data to the empty string */ return webvtt_string_append( result, init_text, len ); } /** * reference counting */ WEBVTT_EXPORT void webvtt_ref_string( webvtt_string *str ) { if( str ) { webvtt_ref( &str->d->refs ); } } WEBVTT_EXPORT void webvtt_release_string( webvtt_string *str ) { /** * pulls the string data out of the string container, decreases the string */ if( str ) { webvtt_string_data *d = str->d; str->d = 0; if( d && webvtt_deref( &d->refs ) == 0 ) { webvtt_free( d ); } } } /** * "Detach" a shared string, so that it's safely mutable */ WEBVTT_EXPORT webvtt_status webvtt_string_detach( /* in, out */ webvtt_string *str ) { webvtt_string_data *d, *q; if( !str ) { return WEBVTT_INVALID_PARAM; } q = str->d; if( q->refs.value == 1 ) { return WEBVTT_SUCCESS; } d = ( webvtt_string_data * )webvtt_alloc( sizeof( webvtt_string_data ) + ( sizeof( webvtt_byte ) * str->d->alloc ) ); d->refs.value = 1; d->text = d->array; d->alloc = q->alloc; d->length = q->length; memcpy( d->text, q->text, q->length ); str->d = d; if( webvtt_deref( &q->refs ) == 0 ) { webvtt_free( q ); } return WEBVTT_SUCCESS; } WEBVTT_EXPORT void webvtt_copy_string( webvtt_string *left, const webvtt_string *right ) { if( left ) { if( right && right->d ) { left->d = right->d; } else { left->d = &empty_string; } webvtt_ref( &left->d->refs ); } } WEBVTT_EXPORT const webvtt_byte * webvtt_string_text(const webvtt_string *str) { if( !str || !str->d ) { return 0; } return str->d->text; } WEBVTT_EXPORT const webvtt_uint32 webvtt_string_length(const webvtt_string *str) { if( !str || !str->d ) { return 0; } return str->d->length; } WEBVTT_EXPORT const webvtt_uint32 webvtt_string_capacity(const webvtt_string *str) { if( !str || !str->d ) { return 0; } return str->d->alloc; } /** * Reallocate string. * Grow to at least 'need' characters. Power of 2 growth. */ static webvtt_status grow( webvtt_string *str, webvtt_uint need ) { static const webvtt_uint page = 0x1000; webvtt_uint32 n; webvtt_string_data *p, *d; webvtt_uint32 grow; if( !str ) { return WEBVTT_INVALID_PARAM; } if( ( str->d->length + need ) <= str->d->alloc ) { return WEBVTT_SUCCESS; } p = d = str->d; grow = sizeof( *d ) + ( sizeof( webvtt_byte ) * ( d->length + need ) ); if( grow < page ) { n = page; do { n = n / 2; } while( n > grow ); if( n < 1 << 6 ) { n = 1 << 6; } else { n = n * 2; } } else { n = page; do { n = n * 2; } while ( n < grow ); } p = ( webvtt_string_data * )webvtt_alloc( n ); if( !p ) { return WEBVTT_OUT_OF_MEMORY; } p->refs.value = 1; p->alloc = ( n - sizeof( *p ) ) / sizeof( webvtt_byte ); p->length = d->length; p->text = p->array; memcpy( p->text, d->text, sizeof( webvtt_byte ) * p->length ); p->text[ p->length ] = 0; str->d = p; if( webvtt_deref( &d->refs ) == 0 ) { webvtt_free( d ); } return WEBVTT_SUCCESS; } WEBVTT_EXPORT int webvtt_string_getline( webvtt_string *src, const webvtt_byte *buffer, webvtt_uint *pos, webvtt_uint len, int *truncate, webvtt_bool finish, webvtt_bool retain_new_line ) { int ret = 0; webvtt_string *str = src; webvtt_string_data *d = 0; const webvtt_byte *s = buffer + *pos; const webvtt_byte *p = s; const webvtt_byte *n = buffer + len; /** *if this is public now, maybe we should return webvtt_status so we can * differentiate between WEBVTT_OUT_OF_MEMORY and WEBVTT_INVALID_PARAM */ if( !str ) { return -1; } /* This had better be a valid string_data, or else NULL. */ d = str->d; if( !str->d ) { if(WEBVTT_FAILED(webvtt_create_string( 0x100, str ))) { return -1; } d = str->d; } while( p < n && *p != UTF8_CARRIAGE_RETURN && *p != UTF8_LINE_FEED ) { ++p; } /* Retain the new line character. */ if( p < n && retain_new_line ) { p++; } if( p < n || finish ) { ret = 1; /* indicate that we found EOL */ } len = (webvtt_uint)( p - s ); *pos += len; if( d->length + len + 1 >= d->alloc ) { if( truncate && d->alloc >= WEBVTT_MAX_LINE ) { /* truncate. */ (*truncate)++; } else { if( grow( str, len + 1 ) == WEBVTT_OUT_OF_MEMORY ) { ret = -1; } d = str->d; } } /* Copy everything in */ if( len && ret >= 0 && d->length + len < d->alloc ) { memcpy( d->text + d->length, s, len ); d->length += len; d->text[ d->length ] = 0; } return ret; } WEBVTT_EXPORT webvtt_status webvtt_string_putc( webvtt_string *str, webvtt_byte to_append ) { webvtt_status result; if( !str ) { return WEBVTT_INVALID_PARAM; } if( WEBVTT_FAILED( result = webvtt_string_detach( str ) ) ) { return result; } if( !WEBVTT_FAILED( result = grow( str, 1 ) ) ) { str->d->text[ str->d->length++ ] = to_append; str->d->text[ str->d->length ] = 0; } return result; } WEBVTT_EXPORT webvtt_bool webvtt_string_is_equal( webvtt_string *str, webvtt_byte *to_compare, webvtt_uint len ) { if( !str || !to_compare || webvtt_string_length( str ) != len ) { return 0; } return memcmp( webvtt_string_text( str ), to_compare, len ) == 0; } WEBVTT_EXPORT webvtt_status webvtt_string_append( webvtt_string *str, const webvtt_byte *buffer, int len ) { webvtt_status result; if( !str || !buffer ) { return WEBVTT_INVALID_PARAM; } if( !str->d ) { webvtt_init_string( str ); } if( len < 0 ) { len = strlen( ( const char * )buffer ); } if( len == 0 ) { return WEBVTT_SUCCESS; } if( !WEBVTT_FAILED( result = grow( str, str->d->length + len ) ) ) { memcpy( str->d->text + str->d->length, buffer, len ); str->d->length += len; /* null-terminate string */ str->d->text[ str->d->length ] = 0; } return result; } WEBVTT_EXPORT webvtt_status webvtt_string_append_string( webvtt_string *str, const webvtt_string *other ) { if( !str || !other ) { return WEBVTT_INVALID_PARAM; } return webvtt_string_append( str, other->d->text, other->d->length ); } /** * String lists */ WEBVTT_EXPORT webvtt_status webvtt_create_stringlist( webvtt_stringlist **result ) { webvtt_stringlist *list; if( !result ) { return WEBVTT_INVALID_PARAM; } list = ( webvtt_stringlist * )webvtt_alloc0( sizeof( *list ) ); if( !list ) { return WEBVTT_OUT_OF_MEMORY; } list->alloc = 0; list->length = 0; webvtt_ref_stringlist( list ); *result = list; return WEBVTT_SUCCESS; } WEBVTT_EXPORT void webvtt_ref_stringlist( webvtt_stringlist *list ) { if( list ) { webvtt_ref( &list->refs ); } } WEBVTT_EXPORT void webvtt_copy_stringlist( webvtt_stringlist **left, webvtt_stringlist *right ) { if( !left || !right ) { return; } *left = right; webvtt_ref_stringlist( *left ); } WEBVTT_EXPORT void webvtt_release_stringlist( webvtt_stringlist **list ) { webvtt_stringlist *l; webvtt_uint i; if( !list || !*list ) { return; } l = *list; if( webvtt_deref( &l->refs ) == 0 ) { if( l->items ) { for( i = 0; i < l->length; i++ ) { webvtt_release_string( &l->items[ i ] ); } webvtt_free( l->items ); } webvtt_free( l ); } *list = 0; } WEBVTT_EXPORT webvtt_status webvtt_stringlist_push( webvtt_stringlist *list, webvtt_string *str ) { if( !list || !str ) { return WEBVTT_INVALID_PARAM; } if( list->length + 1 >= ( ( list->alloc / 3 ) * 2 ) ) { webvtt_string *arr, *old; list->alloc = list->alloc == 0 ? 8 : list->alloc * 2; arr = ( webvtt_string * )webvtt_alloc0( sizeof( webvtt_string ) * list->alloc ); if( !arr ) { return WEBVTT_OUT_OF_MEMORY; } memcpy( arr, list->items, sizeof( webvtt_string ) * list->length ); old = list->items; list->items = arr; webvtt_free( old ); } list->items[list->length].d = str->d; webvtt_ref_string( list->items + list->length++ ); return WEBVTT_SUCCESS; } WEBVTT_EXPORT webvtt_bool webvtt_next_utf8( const webvtt_byte **begin, const webvtt_byte *end ) { int c; const webvtt_byte *p; if( !begin || !*begin || !**begin || ( end && ( end <= *begin ) ) ) { /* Either begin is null, or end is null, or end <= begin */ return 0; } p = *begin; if( !end ) { end = p + strlen( ( const char * )p ); } c = webvtt_utf8_length( p ); if( c > 0 ) { p += c; } else if( ( *p & 0xC0 ) == 0x80 ) { const webvtt_byte *pc = p + 1; while( pc < end && ( ( *pc & 0xC0 ) == 0x80 ) ) { ++pc; } if( pc <= end ) { p = pc; } } if( *begin != p && p <= end ) { *begin = p; return 1; } return 0; } WEBVTT_EXPORT webvtt_bool webvtt_skip_utf8( const webvtt_byte **begin, const webvtt_byte *end, int n_chars ) { const webvtt_byte *first; if( !begin || !*begin ) { return 0; } if( n_chars < 0 ) { return 0; } first = *begin; if( !end ) { end = first + strlen( ( const char * )first ); } if( end > first ) { /* forwards */ while( n_chars && end > *begin ) { if( webvtt_next_utf8( begin, end ) ) { --n_chars; } } } return n_chars == 0; } WEBVTT_EXPORT webvtt_uint16 webvtt_utf8_to_utf16( const webvtt_byte *utf8, const webvtt_byte *end, webvtt_uint16 *high_surrogate ) { int need = 0, min = 0; webvtt_uint32 uc = 0; /* We're missing our pointers */ if( !utf8 ) { return 0; } if( !end ) { end = utf8 + strlen( ( const char * )utf8 ); } if( utf8 >= end ) { return 0; } /* If we are returning a surrogate pair, initialize it to 0 */ if( high_surrogate ) { *high_surrogate = 0; } /* We're not at the start of a character */ if( ( *utf8 & 0xC0 ) == 0x80 ) { return 0; } if( *utf8 < 0x80 ) { return ( webvtt_uint32 )( *utf8 ); } while( utf8 < end ) { webvtt_byte ch = *utf8; utf8++; if( need ) { if( ( ch & 0xC0 ) == 0x80 ) { uc = ( uc << 6 ) | ( ch & 0x3F ); if (!--need) { int nc; if ( !( nc = UTF_IS_NONCHAR( uc ) ) && uc > 0xFFFF && uc < 0x110000) { /* Surrogate pair */ if( high_surrogate ) { *high_surrogate = UTF_HIGH_SURROGATE( uc ); } return UTF_LOW_SURROGATE( uc ); } else if ( ( uc < min ) || ( uc >= 0xD800 && uc <= 0xDFFF ) || nc || uc >= 0x110000) { /* Non-character, overlong sequence, or utf16 surrogate */ return 0xFFFD; } else { /* Non-surrogate */ return uc; } } } } else { if ( ( ch & 0xE0 ) == 0xC0 ) { uc = ch & 0x1f; need = 1; min = 0x80; } else if ( ( ch & 0xF0 ) == 0xE0 ) { uc = ch & 0x0f; need = 2; min = 0x800; } else if ( ( ch & 0xF8 ) == 0xF0 ) { uc = ch & 0x07; need = 3; min = 0x10000; } else { /* TODO This should deal with 5-7 byte sequences */ /* return the replacement character in other cases */ return 0xFFFD; } } } return 0; } WEBVTT_EXPORT int webvtt_utf8_chcount( const webvtt_byte *utf8, const webvtt_byte *end ) { int n = 0; const webvtt_byte *p; if( !utf8 || !*utf8 || ( end != 0 && end < utf8 ) ) { return 0; } if( !end ) { end = utf8 + strlen( ( const char * )utf8 ); } for( p = utf8; p < end; ++n ) { int c = webvtt_utf8_length( p ); if( c < 1 ) { break; } p += c; } return n; } WEBVTT_EXPORT int webvtt_utf8_length( const webvtt_byte *utf8 ) { webvtt_byte ch; if( !utf8 ) { return 0; } ch = *utf8; if( ch < 0x80 ) { return 1; } else if( ( ch & 0xE0 ) == 0xC0 ) { return 2; } else if( ( ch & 0xF0 ) == 0xE0 ) { return 3; } else if( ( ch & 0xF8 ) == 0xF0 ) { return 4; } else if( ( ch & 0xFE ) == 0xFC ) { return 5; } return -1; }