linux-packaging-mono/eglib/src/gmarkup.c

/*
 * gmakrup.c: Minimal XML markup reader.
 *
 * Unlike the GLib one, this can not be restarted with more text
 * as the Mono use does not require it.
 *
 * Actually, with further thought, I think that this could be made
 * to restart very easily.  The pos == end condition would mean
 * "return to caller" and only at end parse this would be a fatal
 * error.
 *
 * Not that it matters to Mono, but it is very simple to change, there
 * is a tricky situation: there are a few places where we check p+n
 * in the source, and that would have to change to be progressive, instead
 * of depending on the string to be complete at that point, so we would
 * have to introduce extra states to cope with that.
 *
 * Author:
 *   Miguel de Icaza (miguel@novell.com)
 *
 * (C) 2006 Novell, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#include <stdio.h>
#include <ctype.h>
#include <glib.h>

#define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);

typedef enum {
	START,
	START_ELEMENT,
	TEXT,
	FLUSH_TEXT,
	CLOSING_ELEMENT,
	COMMENT,
	SKIP_XML_DECLARATION
} ParseState;

struct _GMarkupParseContext {
	GMarkupParser  parser;
	gpointer       user_data;
	GDestroyNotify user_data_dnotify;
	ParseState     state;

	/* Stores the name of the current element, so we can issue the end_element */
	GSList         *level;

	GString        *text;
};

GMarkupParseContext *
g_markup_parse_context_new (const GMarkupParser *parser,
			    GMarkupParseFlags flags,
			    gpointer user_data,
			    GDestroyNotify user_data_dnotify)
{
	GMarkupParseContext *context = g_new0 (GMarkupParseContext, 1);

	context->parser = *parser;
	context->user_data = user_data;
	context->user_data_dnotify = user_data_dnotify;

	return context;
}

void
g_markup_parse_context_free (GMarkupParseContext *context)
{
	GSList *l;

	g_return_if_fail (context != NULL);

	if (context->user_data_dnotify != NULL)
		(context->user_data_dnotify) (context->user_data);

	if (context->text != NULL)
		g_string_free (context->text, TRUE);
	for (l = context->level; l; l = l->next)
		g_free (l->data);
	g_slist_free (context->level);
	g_free (context);
}

static gboolean
my_isspace (char c)
{
	if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v')
		return TRUE;
	return FALSE;
}

static gboolean
my_isalnum (char c)
{
	if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
		return TRUE;
	if (c >= '0' && c <= '9')
		return TRUE;

	return FALSE;
}

static gboolean
my_isalpha (char c)
{
	if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
		return TRUE;
	return FALSE;
}

static const char *
skip_space (const char *p, const char *end)
{
	for (; p < end && my_isspace (*p); p++)
		;
	return p;
}

static const char *
parse_value (const char *p, const char *end, char **value, GError **error)
{
	const char *start;
	int l;

	if (*p != '"'){
		set_error ("%s", "Expected the attribute value to start with a quote");
		return end;
	}
	start = ++p;
	for (; p < end && *p != '"'; p++)
		;
	if (p == end)
		return end;
	l = (int)(p - start);
	p++;
	*value = g_malloc (l + 1);
	if (*value == NULL)
		return end;
	strncpy (*value, start, l);
	(*value) [l] = 0;
	return p;
}

static const char *
parse_name (const char *p, const char *end, char **value)
{
	const char *start = p;
	int l;

	for (; p < end && my_isalnum (*p); p++)
		;
	if (p == end)
		return end;

	l = (int)(p - start);
	*value = g_malloc (l + 1);
	if (*value == NULL)
		return end;
	strncpy (*value, start, l);
	(*value) [l] = 0;
	return p;
}

static const char *
parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop, int state)
{
	int nnames = 0;

	while (TRUE){
		p = skip_space (p, end);
		if (p == end)
			return end;

		if (*p == '>'){
			*full_stop = 0;
			return p;
		}
		if (state == SKIP_XML_DECLARATION && *p == '?' && ((p+1) < end) && *(p+1) == '>'){
			*full_stop = 0;
			return p+1;
		}

		if (*p == '/' && ((p+1) < end && *(p+1) == '>')){
			*full_stop = 1;
			return p+1;
		} else {
			char *name, *value;

			p = parse_name (p, end, &name);
			if (p == end)
				return p;

			p = skip_space (p, end);
			if (p == end){
				g_free (name);
				return p;
			}
			if (*p != '='){
				set_error ("Expected an = after the attribute name `%s'", name);
				g_free (name);
				return end;
			}
			p++;
			p = skip_space (p, end);
			if (p == end){
				g_free (name);
				return end;
			}

			p = parse_value (p, end, &value, error);
			if (p == end){
				g_free (name);
				return p;
			}

			++nnames;
			*names = g_realloc (*names, sizeof (char **) * (nnames+1));
			*values = g_realloc (*values, sizeof (char **) * (nnames+1));
			(*names) [nnames-1] = name;
			(*values) [nnames-1] = value;
			(*names) [nnames] = NULL;
			(*values) [nnames] = NULL;
		}
	}
}

static void
destroy_parse_state (GMarkupParseContext *context)
{
	GSList *p;

	for (p = context->level; p != NULL; p = p->next)
		g_free (p->data);

	g_slist_free (context->level);
	if (context->text != NULL)
		g_string_free (context->text, TRUE);
	context->text = NULL;
	context->level = NULL;
}

gboolean
g_markup_parse_context_parse (GMarkupParseContext *context,
			      const gchar *text, gssize text_len,
			      GError **error)
{
	const char *p,  *end;

	g_return_val_if_fail (context != NULL, FALSE);
	g_return_val_if_fail (text != NULL, FALSE);
	g_return_val_if_fail (text_len >= 0, FALSE);

	end = text + text_len;

	for (p = text; p < end; p++){
		char c = *p;

		switch (context->state){
		case START:
			if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || (c & 0x80))
				continue;
			if (c == '<'){
				if (p+1 < end && p [1] == '?'){
					context->state = SKIP_XML_DECLARATION;
					p++;
				} else
					context->state = START_ELEMENT;
				continue;
			}
			set_error ("%s", "Expected < to start the document");
			goto fail;

		case SKIP_XML_DECLARATION:
		case START_ELEMENT: {
			const char *element_start = p, *element_end;
			char *ename = NULL;
			int full_stop = 0, l;
			gchar **names = NULL, **values = NULL;

			for (; p < end && my_isspace (*p); p++)
				;
			if (p == end){
				set_error ("%s", "Unfinished element");
				goto fail;
			}

			if (*p == '!' && (p+2 < end) && (p [1] == '-') && (p [2] == '-')){
				context->state = COMMENT;
				p += 2;
				break;
			}

			if (!my_isalpha (*p)){
				set_error ("%s", "Expected an element name");
				goto fail;
			}

			for (++p; p < end && (my_isalnum (*p) || (*p == '.')); p++)
				;
			if (p == end){
				set_error ("%s", "Expected an element");
				goto fail;
			}
			element_end = p;

			for (; p < end && my_isspace (*p); p++)
				;
			if (p == end){
				set_error ("%s", "Unfinished element");
				goto fail;
			}
			p = parse_attributes (p, end, &names, &values, error, &full_stop, context->state);
			if (p == end){
				if (names != NULL) {
					g_strfreev (names);
					g_strfreev (values);
				}
				/* Only set the error if parse_attributes did not */
				if (error != NULL && *error == NULL)
					set_error ("%s", "Unfinished sequence");
				goto fail;
			}
			l = (int)(element_end - element_start);
			ename = g_malloc (l + 1);
			if (ename == NULL)
				goto fail;
			strncpy (ename, element_start, l);
			ename [l] = 0;

			if (context->state == START_ELEMENT)
				if (context->parser.start_element != NULL)
					context->parser.start_element (context, ename,
								       (const gchar **) names,
								       (const gchar **) values,
								       context->user_data, error);

			if (names != NULL){
				g_strfreev (names);
				g_strfreev (values);
			}

			if (error != NULL && *error != NULL){
				g_free (ename);
				goto fail;
			}

			if (full_stop){
				if (context->parser.end_element != NULL &&  context->state == START_ELEMENT){
					context->parser.end_element (context, ename, context->user_data, error);
					if (error != NULL && *error != NULL){
						free (ename);
						goto fail;
					}
				}
				g_free (ename);
			} else {
				context->level = g_slist_prepend (context->level, ename);
			}

			context->state = TEXT;
			break;
		} /* case START_ELEMENT */

		case TEXT: {
			if (c == '<'){
				context->state = FLUSH_TEXT;
				break;
			}
			if (context->parser.text != NULL){
				if (context->text == NULL)
					context->text = g_string_new ("");
				g_string_append_c (context->text, c);
			}
			break;
		}

		case COMMENT:
			if (*p != '-')
				break;
			if (p+2 < end && (p [1] == '-') && (p [2] == '>')){
				context->state = TEXT;
				p += 2;
				break;
			}
			break;

		case FLUSH_TEXT:
			if (context->parser.text != NULL && context->text != NULL){
				context->parser.text (context, context->text->str, context->text->len,
						      context->user_data, error);
				if (error != NULL && *error != NULL)
					goto fail;
			}

			if (c == '/')
				context->state = CLOSING_ELEMENT;
			else {
				p--;
				context->state = START_ELEMENT;
			}
			break;

		case CLOSING_ELEMENT: {
			GSList *current = context->level;
			char *text;

			if (context->level == NULL){
				set_error ("%s", "Too many closing tags, not enough open tags");
				goto fail;
			}

			text = current->data;
			if (context->parser.end_element != NULL){
				context->parser.end_element (context, text, context->user_data, error);
				if (error != NULL && *error != NULL){
					g_free (text);
					goto fail;
				}
			}
			g_free (text);

			while (p < end && *p != '>')
				p++;

			context->level = context->level->next;
			g_slist_free_1 (current);
			context->state = TEXT;
			break;
		} /* case CLOSING_ELEMENT */

		} /* switch */
	}


	return TRUE;
 fail:
	if (context->parser.error && error != NULL && *error)
		context->parser.error (context, *error, context->user_data);

	destroy_parse_state (context);
	return FALSE;
}

gboolean
g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **error)
{
	g_return_val_if_fail (context != NULL, FALSE);

	/*
	 * In our case, we always signal errors during parse, not at the end
	 * see the notes at the top of this file for details on how this
	 * could be moved here
	 */
	return TRUE;
}