// HtmlAgilityPack V1.0 - Simon Mourier
using System;
using System.IO;
using System.Text;
namespace HtmlAgilityPack
{
///
/// Represents a document with mixed code and text. ASP, ASPX, JSP, are good example of such documents.
///
public class MixedCodeDocument
{
#region Fields
private int _c;
internal MixedCodeDocumentFragmentList _codefragments;
private MixedCodeDocumentFragment _currentfragment;
internal MixedCodeDocumentFragmentList _fragments;
private int _index;
private int _line;
private int _lineposition;
private ParseState _state;
private Encoding _streamencoding;
internal string _text;
internal MixedCodeDocumentFragmentList _textfragments;
///
/// Gets or sets the token representing code end.
///
public string TokenCodeEnd = "%>";
///
/// Gets or sets the token representing code start.
///
public string TokenCodeStart = "<%";
///
/// Gets or sets the token representing code directive.
///
public string TokenDirective = "@";
///
/// Gets or sets the token representing response write directive.
///
public string TokenResponseWrite = "Response.Write ";
private string TokenTextBlock = "TextBlock({0})";
#endregion
#region Constructors
///
/// Creates a mixed code document instance.
///
public MixedCodeDocument()
{
_codefragments = new MixedCodeDocumentFragmentList(this);
_textfragments = new MixedCodeDocumentFragmentList(this);
_fragments = new MixedCodeDocumentFragmentList(this);
}
#endregion
#region Properties
///
/// Gets the code represented by the mixed code document seen as a template.
///
public string Code
{
get
{
string s = "";
int i = 0;
foreach (MixedCodeDocumentFragment frag in _fragments)
{
switch (frag._type)
{
case MixedCodeDocumentFragmentType.Text:
s += TokenResponseWrite + string.Format(TokenTextBlock, i) + "\n";
i++;
break;
case MixedCodeDocumentFragmentType.Code:
s += ((MixedCodeDocumentCodeFragment) frag).Code + "\n";
break;
}
}
return s;
}
}
///
/// Gets the list of code fragments in the document.
///
public MixedCodeDocumentFragmentList CodeFragments
{
get { return _codefragments; }
}
///
/// Gets the list of all fragments in the document.
///
public MixedCodeDocumentFragmentList Fragments
{
get { return _fragments; }
}
///
/// Gets the encoding of the stream used to read the document.
///
public Encoding StreamEncoding
{
get { return _streamencoding; }
}
///
/// Gets the list of text fragments in the document.
///
public MixedCodeDocumentFragmentList TextFragments
{
get { return _textfragments; }
}
#endregion
#region Public Methods
///
/// Create a code fragment instances.
///
/// The newly created code fragment instance.
public MixedCodeDocumentCodeFragment CreateCodeFragment()
{
return (MixedCodeDocumentCodeFragment) CreateFragment(MixedCodeDocumentFragmentType.Code);
}
///
/// Create a text fragment instances.
///
/// The newly created text fragment instance.
public MixedCodeDocumentTextFragment CreateTextFragment()
{
return (MixedCodeDocumentTextFragment) CreateFragment(MixedCodeDocumentFragmentType.Text);
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
public void Load(Stream stream)
{
Load(new StreamReader(stream));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// The character encoding to use.
public void Load(Stream stream, Encoding encoding)
{
Load(new StreamReader(stream, encoding));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a stream.
///
/// The input stream.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
/// The minimum buffer size.
public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
{
Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
public void Load(string path)
{
Load(new StreamReader(path));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(string path, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(path, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// The character encoding to use.
public void Load(string path, Encoding encoding)
{
Load(new StreamReader(path, encoding));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
{
Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks));
}
///
/// Loads a mixed code document from a file.
///
/// The complete file path to be read.
/// The character encoding to use.
/// Indicates whether to look for byte order marks at the beginning of the file.
/// The minimum buffer size.
public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
{
Load(new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize));
}
///
/// Loads the mixed code document from the specified TextReader.
///
/// The TextReader used to feed the HTML data into the document.
public void Load(TextReader reader)
{
_codefragments.Clear();
_textfragments.Clear();
// all pseudo constructors get down to this one
StreamReader sr = reader as StreamReader;
if (sr != null)
{
_streamencoding = sr.CurrentEncoding;
}
_text = reader.ReadToEnd();
reader.Close();
Parse();
}
///
/// Loads a mixed document from a text
///
/// The text to load.
public void LoadHtml(string html)
{
Load(new StringReader(html));
}
///
/// Saves the mixed document to the specified stream.
///
/// The stream to which you want to save.
public void Save(Stream outStream)
{
StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
Save(sw);
}
///
/// Saves the mixed document to the specified stream.
///
/// The stream to which you want to save.
/// The character encoding to use.
public void Save(Stream outStream, Encoding encoding)
{
StreamWriter sw = new StreamWriter(outStream, encoding);
Save(sw);
}
///
/// Saves the mixed document to the specified file.
///
/// The location of the file where you want to save the document.
public void Save(string filename)
{
StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
Save(sw);
}
///
/// Saves the mixed document to the specified file.
///
/// The location of the file where you want to save the document.
/// The character encoding to use.
public void Save(string filename, Encoding encoding)
{
StreamWriter sw = new StreamWriter(filename, false, encoding);
Save(sw);
}
///
/// Saves the mixed document to the specified StreamWriter.
///
/// The StreamWriter to which you want to save.
public void Save(StreamWriter writer)
{
Save((TextWriter) writer);
}
///
/// Saves the mixed document to the specified TextWriter.
///
/// The TextWriter to which you want to save.
public void Save(TextWriter writer)
{
writer.Flush();
}
#endregion
#region Internal Methods
internal MixedCodeDocumentFragment CreateFragment(MixedCodeDocumentFragmentType type)
{
switch (type)
{
case MixedCodeDocumentFragmentType.Text:
return new MixedCodeDocumentTextFragment(this);
case MixedCodeDocumentFragmentType.Code:
return new MixedCodeDocumentCodeFragment(this);
default:
throw new NotSupportedException();
}
}
internal Encoding GetOutEncoding()
{
if (_streamencoding != null)
return _streamencoding;
return Encoding.Default;
}
#endregion
#region Private Methods
private void IncrementPosition()
{
_index++;
if (_c == 10)
{
_lineposition = 1;
_line++;
}
else
_lineposition++;
}
private void Parse()
{
_state = ParseState.Text;
_index = 0;
_currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
while (_index < _text.Length)
{
_c = _text[_index];
IncrementPosition();
switch (_state)
{
case ParseState.Text:
if (_index + TokenCodeStart.Length < _text.Length)
{
if (_text.Substring(_index - 1, TokenCodeStart.Length) == TokenCodeStart)
{
_state = ParseState.Code;
_currentfragment.Length = _index - 1 - _currentfragment.Index;
_currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Code);
SetPosition();
continue;
}
}
break;
case ParseState.Code:
if (_index + TokenCodeEnd.Length < _text.Length)
{
if (_text.Substring(_index - 1, TokenCodeEnd.Length) == TokenCodeEnd)
{
_state = ParseState.Text;
_currentfragment.Length = _index + TokenCodeEnd.Length - _currentfragment.Index;
_index += TokenCodeEnd.Length;
_lineposition += TokenCodeEnd.Length;
_currentfragment = CreateFragment(MixedCodeDocumentFragmentType.Text);
SetPosition();
continue;
}
}
break;
}
}
_currentfragment.Length = _index - _currentfragment.Index;
}
private void SetPosition()
{
_currentfragment.Line = _line;
_currentfragment._lineposition = _lineposition;
_currentfragment.Index = _index - 1;
_currentfragment.Length = 0;
}
#endregion
#region Nested type: ParseState
private enum ParseState
{
Text,
Code
}
#endregion
}
}