You've already forked linux-packaging-mono
							
							
		
			
				
	
	
		
			1962 lines
		
	
	
		
			70 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			1962 lines
		
	
	
		
			70 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
 | |
| using System;
 | |
| using System.Collections;
 | |
| using System.Collections.Generic;
 | |
| using System.IO;
 | |
| using System.Text;
 | |
| using System.Text.RegularExpressions;
 | |
| using System.Xml;
 | |
| using System.Xml.XPath;
 | |
| 
 | |
| namespace HtmlAgilityPack
 | |
| {
 | |
|     /// <summary>
 | |
|     /// Represents a complete HTML document.
 | |
|     /// </summary>
 | |
|     public class HtmlDocument : IXPathNavigable
 | |
|     {
 | |
|         #region Fields
 | |
| 
 | |
|         private int _c;
 | |
|         private Crc32 _crc32;
 | |
|         private HtmlAttribute _currentattribute;
 | |
|         private HtmlNode _currentnode;
 | |
|         private Encoding _declaredencoding;
 | |
|         private HtmlNode _documentnode;
 | |
|         private bool _fullcomment;
 | |
|         private int _index;
 | |
|         internal Hashtable _lastnodes = new Hashtable();
 | |
|         private HtmlNode _lastparentnode;
 | |
|         private int _line;
 | |
|         private int _lineposition, _maxlineposition;
 | |
|         internal Hashtable _nodesid;
 | |
|         private ParseState _oldstate;
 | |
|         private bool _onlyDetectEncoding;
 | |
|         internal Hashtable _openednodes;
 | |
|         private List<HtmlParseError> _parseerrors = new List<HtmlParseError>();
 | |
|         private string _remainder;
 | |
|         private int _remainderOffset;
 | |
|         private ParseState _state;
 | |
|         private Encoding _streamencoding;
 | |
|         internal string _text;
 | |
| 
 | |
|         // public props
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Adds Debugging attributes to node. Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionAddDebuggingAttributes;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if closing for non closed nodes must be done at the end or directly in the document.
 | |
|         /// Setting this to true can actually change how browsers render the page. Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionAutoCloseOnEnd; // close errors at the end
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if non closed nodes will be checked at the end of parsing. Default is true.
 | |
|         /// </summary>
 | |
|         public bool OptionCheckSyntax = true;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if a checksum must be computed for the document while parsing. Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionComputeChecksum;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines the default stream encoding to use. Default is System.Text.Encoding.Default.
 | |
|         /// </summary>
 | |
|         public Encoding OptionDefaultStreamEncoding = Encoding.Default;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if source text must be extracted while parsing errors.
 | |
|         /// If the document has a lot of errors, or cascading errors, parsing performance can be dramatically affected if set to true.
 | |
|         /// Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionExtractErrorSourceText;
 | |
| 
 | |
|         // turning this on can dramatically slow performance if a lot of errors are detected
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines the maximum length of source text or parse errors. Default is 100.
 | |
|         /// </summary>
 | |
|         public int OptionExtractErrorSourceTextMaxLength = 100;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if LI, TR, TH, TD tags must be partially fixed when nesting errors are detected. Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionFixNestedTags; // fix li, tr, th, td tags
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if output must conform to XML, instead of HTML.
 | |
|         /// </summary>
 | |
|         public bool OptionOutputAsXml;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if attribute value output must be optimized (not bound with double quotes if it is possible). Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionOutputOptimizeAttributeValues;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if name must be output with it's original case. Useful for asp.net tags and attributes
 | |
|         /// </summary>
 | |
|         public bool OptionOutputOriginalCase;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if name must be output in uppercase. Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionOutputUpperCase;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if declared encoding must be read from the document.
 | |
|         /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
 | |
|         /// Default is true.
 | |
|         /// </summary>
 | |
|         public bool OptionReadEncoding = true;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines the name of a node that will throw the StopperNodeException when found as an end node. Default is null.
 | |
|         /// </summary>
 | |
|         public string OptionStopperNodeName;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if the 'id' attribute must be specifically used. Default is true.
 | |
|         /// </summary>
 | |
|         public bool OptionUseIdAttribute = true;
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Defines if empty nodes must be written as closed during output. Default is false.
 | |
|         /// </summary>
 | |
|         public bool OptionWriteEmptyNodes;
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Static Members
 | |
| 
 | |
|         internal static readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node";
 | |
| 
 | |
|         internal static readonly string HtmlExceptionUseIdAttributeFalse =
 | |
|             "You need to set UseIdAttribute property to true to enable this feature";
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Constructors
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an instance of an HTML document.
 | |
|         /// </summary>
 | |
|         public HtmlDocument()
 | |
|         {
 | |
|             _documentnode = CreateNode(HtmlNodeType.Document, 0);
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Properties
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the document CRC32 checksum if OptionComputeChecksum was set to true before parsing, 0 otherwise.
 | |
|         /// </summary>
 | |
|         public int CheckSum
 | |
|         {
 | |
|             get
 | |
|             {
 | |
|                 if (_crc32 == null)
 | |
|                 {
 | |
|                     return 0;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     return (int) _crc32.CheckSum;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the document's declared encoding.
 | |
|         /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
 | |
|         /// </summary>
 | |
|         public Encoding DeclaredEncoding
 | |
|         {
 | |
|             get { return _declaredencoding; }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the root node of the document.
 | |
|         /// </summary>
 | |
|         public HtmlNode DocumentNode
 | |
|         {
 | |
|             get { return _documentnode; }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the document's output encoding.
 | |
|         /// </summary>
 | |
|         public Encoding Encoding
 | |
|         {
 | |
|             get { return GetOutEncoding(); }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets a list of parse errors found in the document.
 | |
|         /// </summary>
 | |
|         public IEnumerable<HtmlParseError> ParseErrors
 | |
|         {
 | |
|             get { return _parseerrors; }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the remaining text.
 | |
|         /// Will always be null if OptionStopperNodeName is null.
 | |
|         /// </summary>
 | |
|         public string Remainder
 | |
|         {
 | |
|             get { return _remainder; }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the offset of Remainder in the original Html text.
 | |
|         /// If OptionStopperNodeName is null, this will return the length of the original Html text.
 | |
|         /// </summary>
 | |
|         public int RemainderOffset
 | |
|         {
 | |
|             get { return _remainderOffset; }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the document's stream encoding.
 | |
|         /// </summary>
 | |
|         public Encoding StreamEncoding
 | |
|         {
 | |
|             get { return _streamencoding; }
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region IXPathNavigable Members
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates a new XPathNavigator object for navigating this HTML document.
 | |
|         /// </summary>
 | |
|         /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the root of the document.</returns>
 | |
|         public XPathNavigator CreateNavigator()
 | |
|         {
 | |
|             return new HtmlNodeNavigator(this, _documentnode);
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Public Methods
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets a valid XML name.
 | |
|         /// </summary>
 | |
|         /// <param name="name">Any text.</param>
 | |
|         /// <returns>A string that is a valid XML name.</returns>
 | |
|         public static string GetXmlName(string name)
 | |
|         {
 | |
|             string xmlname = string.Empty;
 | |
|             bool nameisok = true;
 | |
|             for (int i = 0; i < name.Length; i++)
 | |
|             {
 | |
|                 // names are lcase
 | |
|                 // note: we are very limited here, too much?
 | |
|                 if (((name[i] >= 'a') && (name[i] <= 'z')) ||
 | |
|                     ((name[i] >= '0') && (name[i] <= '9')) ||
 | |
|                     //					(name[i]==':') || (name[i]=='_') || (name[i]=='-') || (name[i]=='.')) // these are bads in fact
 | |
|                     (name[i] == '_') || (name[i] == '-') || (name[i] == '.'))
 | |
|                 {
 | |
|                     xmlname += name[i];
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     nameisok = false;
 | |
|                     byte[] bytes = Encoding.UTF8.GetBytes(new char[] {name[i]});
 | |
|                     for (int j = 0; j < bytes.Length; j++)
 | |
|                     {
 | |
|                         xmlname += bytes[j].ToString("x2");
 | |
|                     }
 | |
|                     xmlname += "_";
 | |
|                 }
 | |
|             }
 | |
|             if (nameisok)
 | |
|             {
 | |
|                 return xmlname;
 | |
|             }
 | |
|             return "_" + xmlname;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Applies HTML encoding to a specified string.
 | |
|         /// </summary>
 | |
|         /// <param name="html">The input string to encode. May not be null.</param>
 | |
|         /// <returns>The encoded string.</returns>
 | |
|         public static string HtmlEncode(string html)
 | |
|         {
 | |
|             if (html == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("html");
 | |
|             }
 | |
|             // replace & by & but only once!
 | |
|             Regex rx = new Regex("&(?!(amp;)|(lt;)|(gt;)|(quot;))", RegexOptions.IgnoreCase);
 | |
|             return rx.Replace(html, "&").Replace("<", "<").Replace(">", ">").Replace("\"", """);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Determines if the specified character is considered as a whitespace character.
 | |
|         /// </summary>
 | |
|         /// <param name="c">The character to check.</param>
 | |
|         /// <returns>true if if the specified character is considered as a whitespace character.</returns>
 | |
|         public static bool IsWhiteSpace(int c)
 | |
|         {
 | |
|             if ((c == 10) || (c == 13) || (c == 32) || (c == 9))
 | |
|             {
 | |
|                 return true;
 | |
|             }
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML attribute with the specified name.
 | |
|         /// </summary>
 | |
|         /// <param name="name">The name of the attribute. May not be null.</param>
 | |
|         /// <returns>The new HTML attribute.</returns>
 | |
|         public HtmlAttribute CreateAttribute(string name)
 | |
|         {
 | |
|             if (name == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("name");
 | |
|             }
 | |
|             HtmlAttribute att = CreateAttribute();
 | |
|             att.Name = name;
 | |
|             return att;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML attribute with the specified name.
 | |
|         /// </summary>
 | |
|         /// <param name="name">The name of the attribute. May not be null.</param>
 | |
|         /// <param name="value">The value of the attribute.</param>
 | |
|         /// <returns>The new HTML attribute.</returns>
 | |
|         public HtmlAttribute CreateAttribute(string name, string value)
 | |
|         {
 | |
|             if (name == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("name");
 | |
|             }
 | |
|             HtmlAttribute att = CreateAttribute(name);
 | |
|             att.Value = value;
 | |
|             return att;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML comment node.
 | |
|         /// </summary>
 | |
|         /// <returns>The new HTML comment node.</returns>
 | |
|         public HtmlCommentNode CreateComment()
 | |
|         {
 | |
|             return (HtmlCommentNode) CreateNode(HtmlNodeType.Comment);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML comment node with the specified comment text.
 | |
|         /// </summary>
 | |
|         /// <param name="comment">The comment text. May not be null.</param>
 | |
|         /// <returns>The new HTML comment node.</returns>
 | |
|         public HtmlCommentNode CreateComment(string comment)
 | |
|         {
 | |
|             if (comment == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("comment");
 | |
|             }
 | |
|             HtmlCommentNode c = CreateComment();
 | |
|             c.Comment = comment;
 | |
|             return c;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML element node with the specified name.
 | |
|         /// </summary>
 | |
|         /// <param name="name">The qualified name of the element. May not be null.</param>
 | |
|         /// <returns>The new HTML node.</returns>
 | |
|         public HtmlNode CreateElement(string name)
 | |
|         {
 | |
|             if (name == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("name");
 | |
|             }
 | |
|             HtmlNode node = CreateNode(HtmlNodeType.Element);
 | |
|             node.Name = name;
 | |
|             return node;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML text node.
 | |
|         /// </summary>
 | |
|         /// <returns>The new HTML text node.</returns>
 | |
|         public HtmlTextNode CreateTextNode()
 | |
|         {
 | |
|             return (HtmlTextNode) CreateNode(HtmlNodeType.Text);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Creates an HTML text node with the specified text.
 | |
|         /// </summary>
 | |
|         /// <param name="text">The text of the node. May not be null.</param>
 | |
|         /// <returns>The new HTML text node.</returns>
 | |
|         public HtmlTextNode CreateTextNode(string text)
 | |
|         {
 | |
|             if (text == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("text");
 | |
|             }
 | |
|             HtmlTextNode t = CreateTextNode();
 | |
|             t.Text = text;
 | |
|             return t;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Detects the encoding of an HTML stream.
 | |
|         /// </summary>
 | |
|         /// <param name="stream">The input stream. May not be null.</param>
 | |
|         /// <returns>The detected encoding.</returns>
 | |
|         public Encoding DetectEncoding(Stream stream)
 | |
|         {
 | |
|             if (stream == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("stream");
 | |
|             }
 | |
|             return DetectEncoding(new StreamReader(stream));
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Detects the encoding of an HTML file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">Path for the file containing the HTML document to detect. May not be null.</param>
 | |
|         /// <returns>The detected encoding.</returns>
 | |
|         public Encoding DetectEncoding(string path)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
 | |
|             Encoding encoding = DetectEncoding(sr);
 | |
|             sr.Close();
 | |
|             return encoding;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Detects the encoding of an HTML text provided on a TextReader.
 | |
|         /// </summary>
 | |
|         /// <param name="reader">The TextReader used to feed the HTML. May not be null.</param>
 | |
|         /// <returns>The detected encoding.</returns>
 | |
|         public Encoding DetectEncoding(TextReader reader)
 | |
|         {
 | |
|             if (reader == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("reader");
 | |
|             }
 | |
|             _onlyDetectEncoding = true;
 | |
|             if (OptionCheckSyntax)
 | |
|             {
 | |
|                 _openednodes = new Hashtable();
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _openednodes = null;
 | |
|             }
 | |
| 
 | |
|             if (OptionUseIdAttribute)
 | |
|             {
 | |
|                 _nodesid = new Hashtable();
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _nodesid = null;
 | |
|             }
 | |
| 
 | |
|             StreamReader sr = reader as StreamReader;
 | |
|             if (sr != null)
 | |
|             {
 | |
|                 _streamencoding = sr.CurrentEncoding;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _streamencoding = null;
 | |
|             }
 | |
|             _declaredencoding = null;
 | |
| 
 | |
|             _text = reader.ReadToEnd();
 | |
|             _documentnode = CreateNode(HtmlNodeType.Document, 0);
 | |
| 
 | |
|             // this is almost a hack, but it allows us not to muck with the original parsing code
 | |
|             try
 | |
|             {
 | |
|                 Parse();
 | |
|             }
 | |
|             catch (EncodingFoundException ex)
 | |
|             {
 | |
|                 return ex.Encoding;
 | |
|             }
 | |
|             return null;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Detects the encoding of an HTML document from a file first, and then loads the file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read.</param>
 | |
|         public void DetectEncodingAndLoad(string path)
 | |
|         {
 | |
|             DetectEncodingAndLoad(path, true);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Detects the encoding of an HTML document from a file first, and then loads the file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read. May not be null.</param>
 | |
|         /// <param name="detectEncoding">true to detect encoding, false otherwise.</param>
 | |
|         public void DetectEncodingAndLoad(string path, bool detectEncoding)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             Encoding enc;
 | |
|             if (detectEncoding)
 | |
|             {
 | |
|                 enc = DetectEncoding(path);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 enc = null;
 | |
|             }
 | |
| 
 | |
|             if (enc == null)
 | |
|             {
 | |
|                 Load(path);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 Load(path, enc);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Detects the encoding of an HTML text.
 | |
|         /// </summary>
 | |
|         /// <param name="html">The input html text. May not be null.</param>
 | |
|         /// <returns>The detected encoding.</returns>
 | |
|         public Encoding DetectEncodingHtml(string html)
 | |
|         {
 | |
|             if (html == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("html");
 | |
|             }
 | |
|             StringReader sr = new StringReader(html);
 | |
|             Encoding encoding = DetectEncoding(sr);
 | |
|             sr.Close();
 | |
|             return encoding;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Gets the HTML node with the specified 'id' attribute value.
 | |
|         /// </summary>
 | |
|         /// <param name="id">The attribute id to match. May not be null.</param>
 | |
|         /// <returns>The HTML node with the matching id or null if not found.</returns>
 | |
|         public HtmlNode GetElementbyId(string id)
 | |
|         {
 | |
|             if (id == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("id");
 | |
|             }
 | |
|             if (_nodesid == null)
 | |
|             {
 | |
|                 throw new Exception(HtmlExceptionUseIdAttributeFalse);
 | |
|             }
 | |
| 
 | |
|             return _nodesid[id.ToLower()] as HtmlNode;
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a stream.
 | |
|         /// </summary>
 | |
|         /// <param name="stream">The input stream.</param>
 | |
|         public void Load(Stream stream)
 | |
|         {
 | |
|             Load(new StreamReader(stream, OptionDefaultStreamEncoding));
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a stream.
 | |
|         /// </summary>
 | |
|         /// <param name="stream">The input stream.</param>
 | |
|         /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
 | |
|         public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
 | |
|         {
 | |
|             Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a stream.
 | |
|         /// </summary>
 | |
|         /// <param name="stream">The input stream.</param>
 | |
|         /// <param name="encoding">The character encoding to use.</param>
 | |
|         public void Load(Stream stream, Encoding encoding)
 | |
|         {
 | |
|             Load(new StreamReader(stream, encoding));
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a stream.
 | |
|         /// </summary>
 | |
|         /// <param name="stream">The input stream.</param>
 | |
|         /// <param name="encoding">The character encoding to use.</param>
 | |
|         /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
 | |
|         public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
 | |
|         {
 | |
|             Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a stream.
 | |
|         /// </summary>
 | |
|         /// <param name="stream">The input stream.</param>
 | |
|         /// <param name="encoding">The character encoding to use.</param>
 | |
|         /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
 | |
|         /// <param name="buffersize">The minimum buffer size.</param>
 | |
|         public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
 | |
|         {
 | |
|             Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read. May not be null.</param>
 | |
|         public void Load(string path)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
 | |
|             Load(sr);
 | |
|             sr.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read. May not be null.</param>
 | |
|         /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
 | |
|         public void Load(string path, bool detectEncodingFromByteOrderMarks)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             StreamReader sr = new StreamReader(path, detectEncodingFromByteOrderMarks);
 | |
|             Load(sr);
 | |
|             sr.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read. May not be null.</param>
 | |
|         /// <param name="encoding">The character encoding to use. May not be null.</param>
 | |
|         public void Load(string path, Encoding encoding)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             if (encoding == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("encoding");
 | |
|             }
 | |
|             StreamReader sr = new StreamReader(path, encoding);
 | |
|             Load(sr);
 | |
|             sr.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read. May not be null.</param>
 | |
|         /// <param name="encoding">The character encoding to use. May not be null.</param>
 | |
|         /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
 | |
|         public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             if (encoding == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("encoding");
 | |
|             }
 | |
|             StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks);
 | |
|             Load(sr);
 | |
|             sr.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads an HTML document from a file.
 | |
|         /// </summary>
 | |
|         /// <param name="path">The complete file path to be read. May not be null.</param>
 | |
|         /// <param name="encoding">The character encoding to use. May not be null.</param>
 | |
|         /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
 | |
|         /// <param name="buffersize">The minimum buffer size.</param>
 | |
|         public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
 | |
|         {
 | |
|             if (path == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("path");
 | |
|             }
 | |
|             if (encoding == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("encoding");
 | |
|             }
 | |
|             StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize);
 | |
|             Load(sr);
 | |
|             sr.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads the HTML document from the specified TextReader.
 | |
|         /// </summary>
 | |
|         /// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param>
 | |
|         public void Load(TextReader reader)
 | |
|         {
 | |
|             // all Load methods pass down to this one
 | |
|             if (reader == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("reader");
 | |
|             }
 | |
| 
 | |
|             _onlyDetectEncoding = false;
 | |
| 
 | |
|             if (OptionCheckSyntax)
 | |
|             {
 | |
|                 _openednodes = new Hashtable();
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _openednodes = null;
 | |
|             }
 | |
| 
 | |
|             if (OptionUseIdAttribute)
 | |
|             {
 | |
|                 _nodesid = new Hashtable();
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _nodesid = null;
 | |
|             }
 | |
| 
 | |
|             StreamReader sr = reader as StreamReader;
 | |
|             if (sr != null)
 | |
|             {
 | |
|                 try
 | |
|                 {
 | |
|                     // trigger bom read if needed
 | |
|                     sr.Peek();
 | |
|                 }
 | |
|                     // ReSharper disable EmptyGeneralCatchClause
 | |
|                 catch (Exception)
 | |
|                     // ReSharper restore EmptyGeneralCatchClause
 | |
|                 {
 | |
|                     // void on purpose
 | |
|                 }
 | |
|                 _streamencoding = sr.CurrentEncoding;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _streamencoding = null;
 | |
|             }
 | |
|             _declaredencoding = null;
 | |
| 
 | |
|             _text = reader.ReadToEnd();
 | |
|             _documentnode = CreateNode(HtmlNodeType.Document, 0);
 | |
|             Parse();
 | |
| 
 | |
|             if (OptionCheckSyntax)
 | |
|             {
 | |
|                 foreach (HtmlNode node in _openednodes.Values)
 | |
|                 {
 | |
|                     if (!node._starttag) // already reported
 | |
|                     {
 | |
|                         continue;
 | |
|                     }
 | |
| 
 | |
|                     string html;
 | |
|                     if (OptionExtractErrorSourceText)
 | |
|                     {
 | |
|                         html = node.OuterHtml;
 | |
|                         if (html.Length > OptionExtractErrorSourceTextMaxLength)
 | |
|                         {
 | |
|                             html = html.Substring(0, OptionExtractErrorSourceTextMaxLength);
 | |
|                         }
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         html = string.Empty;
 | |
|                     }
 | |
|                     AddError(
 | |
|                         HtmlParseErrorCode.TagNotClosed,
 | |
|                         node._line, node._lineposition,
 | |
|                         node._streamposition, html,
 | |
|                         "End tag </" + node.Name + "> was not found");
 | |
|                 }
 | |
| 
 | |
|                 // we don't need this anymore
 | |
|                 _openednodes.Clear();
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Loads the HTML document from the specified string.
 | |
|         /// </summary>
 | |
|         /// <param name="html">String containing the HTML document to load. May not be null.</param>
 | |
|         public void LoadHtml(string html)
 | |
|         {
 | |
|             if (html == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("html");
 | |
|             }
 | |
|             StringReader sr = new StringReader(html);
 | |
|             Load(sr);
 | |
|             sr.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the HTML document to the specified stream.
 | |
|         /// </summary>
 | |
|         /// <param name="outStream">The stream to which you want to save.</param>
 | |
|         public void Save(Stream outStream)
 | |
|         {
 | |
|             StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
 | |
|             Save(sw);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the HTML document to the specified stream.
 | |
|         /// </summary>
 | |
|         /// <param name="outStream">The stream to which you want to save. May not be null.</param>
 | |
|         /// <param name="encoding">The character encoding to use. May not be null.</param>
 | |
|         public void Save(Stream outStream, Encoding encoding)
 | |
|         {
 | |
|             if (outStream == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("outStream");
 | |
|             }
 | |
|             if (encoding == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("encoding");
 | |
|             }
 | |
|             StreamWriter sw = new StreamWriter(outStream, encoding);
 | |
|             Save(sw);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the mixed document to the specified file.
 | |
|         /// </summary>
 | |
|         /// <param name="filename">The location of the file where you want to save the document.</param>
 | |
|         public void Save(string filename)
 | |
|         {
 | |
|             StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
 | |
|             Save(sw);
 | |
|             sw.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the mixed document to the specified file.
 | |
|         /// </summary>
 | |
|         /// <param name="filename">The location of the file where you want to save the document. May not be null.</param>
 | |
|         /// <param name="encoding">The character encoding to use. May not be null.</param>
 | |
|         public void Save(string filename, Encoding encoding)
 | |
|         {
 | |
|             if (filename == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("filename");
 | |
|             }
 | |
|             if (encoding == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("encoding");
 | |
|             }
 | |
|             StreamWriter sw = new StreamWriter(filename, false, encoding);
 | |
|             Save(sw);
 | |
|             sw.Close();
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the HTML document to the specified StreamWriter.
 | |
|         /// </summary>
 | |
|         /// <param name="writer">The StreamWriter to which you want to save.</param>
 | |
|         public void Save(StreamWriter writer)
 | |
|         {
 | |
|             Save((TextWriter) writer);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the HTML document to the specified TextWriter.
 | |
|         /// </summary>
 | |
|         /// <param name="writer">The TextWriter to which you want to save. May not be null.</param>
 | |
|         public void Save(TextWriter writer)
 | |
|         {
 | |
|             if (writer == null)
 | |
|             {
 | |
|                 throw new ArgumentNullException("writer");
 | |
|             }
 | |
|             DocumentNode.WriteTo(writer);
 | |
|         }
 | |
| 
 | |
|         /// <summary>
 | |
|         /// Saves the HTML document to the specified XmlWriter.
 | |
|         /// </summary>
 | |
|         /// <param name="writer">The XmlWriter to which you want to save.</param>
 | |
|         public void Save(XmlWriter writer)
 | |
|         {
 | |
|             DocumentNode.WriteTo(writer);
 | |
|             writer.Flush();
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Internal Methods
 | |
| 
 | |
|         internal HtmlAttribute CreateAttribute()
 | |
|         {
 | |
|             return new HtmlAttribute(this);
 | |
|         }
 | |
| 
 | |
|         internal HtmlNode CreateNode(HtmlNodeType type)
 | |
|         {
 | |
|             return CreateNode(type, -1);
 | |
|         }
 | |
| 
 | |
|         internal HtmlNode CreateNode(HtmlNodeType type, int index)
 | |
|         {
 | |
|             switch (type)
 | |
|             {
 | |
|                 case HtmlNodeType.Comment:
 | |
|                     return new HtmlCommentNode(this, index);
 | |
| 
 | |
|                 case HtmlNodeType.Text:
 | |
|                     return new HtmlTextNode(this, index);
 | |
| 
 | |
|                 default:
 | |
|                     return new HtmlNode(type, this, index);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         internal Encoding GetOutEncoding()
 | |
|         {
 | |
|             // when unspecified, use the stream encoding first
 | |
|             if (_declaredencoding != null)
 | |
|             {
 | |
|                 return _declaredencoding;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 if (_streamencoding != null)
 | |
|                 {
 | |
|                     return _streamencoding;
 | |
|                 }
 | |
|             }
 | |
|             return OptionDefaultStreamEncoding;
 | |
|         }
 | |
| 
 | |
|         internal HtmlNode GetXmlDeclaration()
 | |
|         {
 | |
|             if (!_documentnode.HasChildNodes)
 | |
|             {
 | |
|                 return null;
 | |
|             }
 | |
| 
 | |
|             foreach (HtmlNode node in _documentnode._childnodes)
 | |
|             {
 | |
|                 if (node.Name == "?xml") // it's ok, names are case sensitive
 | |
|                 {
 | |
|                     return node;
 | |
|                 }
 | |
|             }
 | |
|             return null;
 | |
|         }
 | |
| 
 | |
|         internal void SetIdForNode(HtmlNode node, string id)
 | |
|         {
 | |
|             if (!OptionUseIdAttribute)
 | |
|             {
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             if ((_nodesid == null) || (id == null))
 | |
|             {
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             if (node == null)
 | |
|             {
 | |
|                 _nodesid.Remove(id.ToLower());
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _nodesid[id.ToLower()] = node;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         internal void UpdateLastParentNode()
 | |
|         {
 | |
|             do
 | |
|             {
 | |
|                 if (_lastparentnode.Closed)
 | |
|                 {
 | |
|                     _lastparentnode = _lastparentnode.ParentNode;
 | |
|                 }
 | |
|             } while ((_lastparentnode != null) && (_lastparentnode.Closed));
 | |
|             if (_lastparentnode == null)
 | |
|             {
 | |
|                 _lastparentnode = _documentnode;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Private Methods
 | |
| 
 | |
|         private HtmlParseError AddError(
 | |
|             HtmlParseErrorCode code,
 | |
|             int line,
 | |
|             int linePosition,
 | |
|             int streamPosition,
 | |
|             string sourceText,
 | |
|             string reason)
 | |
|         {
 | |
|             HtmlParseError err = new HtmlParseError(code, line, linePosition, streamPosition, sourceText, reason);
 | |
|             _parseerrors.Add(err);
 | |
|             return err;
 | |
|         }
 | |
| 
 | |
|         private void CloseCurrentNode()
 | |
|         {
 | |
|             if (_currentnode.Closed) // text or document are by def closed
 | |
|                 return;
 | |
| 
 | |
|             bool error = false;
 | |
| 
 | |
|             // find last node of this kind
 | |
|             HtmlNode prev = (HtmlNode) _lastnodes[_currentnode.Name];
 | |
|             if (prev == null)
 | |
|             {
 | |
|                 if (HtmlNode.IsClosedElement(_currentnode.Name))
 | |
|                 {
 | |
|                     // </br> will be seen as <br>
 | |
|                     _currentnode.CloseNode(_currentnode);
 | |
| 
 | |
|                     // add to parent node
 | |
|                     if (_lastparentnode != null)
 | |
|                     {
 | |
|                         HtmlNode foundNode = null;
 | |
|                         Stack futureChild = new Stack();
 | |
|                         for (HtmlNode node = _lastparentnode.LastChild; node != null; node = node.PreviousSibling)
 | |
|                         {
 | |
|                             if ((node.Name == _currentnode.Name) && (!node.HasChildNodes))
 | |
|                             {
 | |
|                                 foundNode = node;
 | |
|                                 break;
 | |
|                             }
 | |
|                             futureChild.Push(node);
 | |
|                         }
 | |
|                         if (foundNode != null)
 | |
|                         {
 | |
|                             HtmlNode node = null;
 | |
|                             while (futureChild.Count != 0)
 | |
|                             {
 | |
|                                 node = (HtmlNode) futureChild.Pop();
 | |
|                                 _lastparentnode.RemoveChild(node);
 | |
|                                 foundNode.AppendChild(node);
 | |
|                             }
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             _lastparentnode.AppendChild(_currentnode);
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     // node has no parent
 | |
|                     // node is not a closed node
 | |
| 
 | |
|                     if (HtmlNode.CanOverlapElement(_currentnode.Name))
 | |
|                     {
 | |
|                         // this is a hack: add it as a text node
 | |
|                         HtmlNode closenode = CreateNode(HtmlNodeType.Text, _currentnode._outerstartindex);
 | |
|                         closenode._outerlength = _currentnode._outerlength;
 | |
|                         ((HtmlTextNode) closenode).Text = ((HtmlTextNode) closenode).Text.ToLower();
 | |
|                         if (_lastparentnode != null)
 | |
|                         {
 | |
|                             _lastparentnode.AppendChild(closenode);
 | |
|                         }
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         if (HtmlNode.IsEmptyElement(_currentnode.Name))
 | |
|                         {
 | |
|                             AddError(
 | |
|                                 HtmlParseErrorCode.EndTagNotRequired,
 | |
|                                 _currentnode._line, _currentnode._lineposition,
 | |
|                                 _currentnode._streamposition, _currentnode.OuterHtml,
 | |
|                                 "End tag </" + _currentnode.Name + "> is not required");
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             // node cannot overlap, node is not empty
 | |
|                             AddError(
 | |
|                                 HtmlParseErrorCode.TagNotOpened,
 | |
|                                 _currentnode._line, _currentnode._lineposition,
 | |
|                                 _currentnode._streamposition, _currentnode.OuterHtml,
 | |
|                                 "Start tag <" + _currentnode.Name + "> was not found");
 | |
|                             error = true;
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 if (OptionFixNestedTags)
 | |
|                 {
 | |
|                     if (FindResetterNodes(prev, GetResetters(_currentnode.Name)))
 | |
|                     {
 | |
|                         AddError(
 | |
|                             HtmlParseErrorCode.EndTagInvalidHere,
 | |
|                             _currentnode._line, _currentnode._lineposition,
 | |
|                             _currentnode._streamposition, _currentnode.OuterHtml,
 | |
|                             "End tag </" + _currentnode.Name + "> invalid here");
 | |
|                         error = true;
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 if (!error)
 | |
|                 {
 | |
|                     _lastnodes[_currentnode.Name] = prev._prevwithsamename;
 | |
|                     prev.CloseNode(_currentnode);
 | |
|                 }
 | |
|             }
 | |
| 
 | |
| 
 | |
|             // we close this node, get grandparent
 | |
|             if (!error)
 | |
|             {
 | |
|                 if ((_lastparentnode != null) &&
 | |
|                     ((!HtmlNode.IsClosedElement(_currentnode.Name)) ||
 | |
|                      (_currentnode._starttag)))
 | |
|                 {
 | |
|                     UpdateLastParentNode();
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private string CurrentAttributeName()
 | |
|         {
 | |
|             return _text.Substring(_currentattribute._namestartindex, _currentattribute._namelength);
 | |
|         }
 | |
| 
 | |
|         private string CurrentAttributeValue()
 | |
|         {
 | |
|             return _text.Substring(_currentattribute._valuestartindex, _currentattribute._valuelength);
 | |
|         }
 | |
| 
 | |
|         private string CurrentNodeInner()
 | |
|         {
 | |
|             return _text.Substring(_currentnode._innerstartindex, _currentnode._innerlength);
 | |
|         }
 | |
| 
 | |
|         private string CurrentNodeName()
 | |
|         {
 | |
|             return _text.Substring(_currentnode._namestartindex, _currentnode._namelength);
 | |
|         }
 | |
| 
 | |
|         private string CurrentNodeOuter()
 | |
|         {
 | |
|             return _text.Substring(_currentnode._outerstartindex, _currentnode._outerlength);
 | |
|         }
 | |
| 
 | |
| 
 | |
|         private void DecrementPosition()
 | |
|         {
 | |
|             _index--;
 | |
|             if (_lineposition == 1)
 | |
|             {
 | |
|                 _lineposition = _maxlineposition;
 | |
|                 _line--;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _lineposition--;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private HtmlNode FindResetterNode(HtmlNode node, string name)
 | |
|         {
 | |
|             HtmlNode resetter = (HtmlNode) _lastnodes[name];
 | |
|             if (resetter == null)
 | |
|                 return null;
 | |
|             if (resetter.Closed)
 | |
|             {
 | |
|                 return null;
 | |
|             }
 | |
|             if (resetter._streamposition < node._streamposition)
 | |
|             {
 | |
|                 return null;
 | |
|             }
 | |
|             return resetter;
 | |
|         }
 | |
| 
 | |
|         private bool FindResetterNodes(HtmlNode node, string[] names)
 | |
|         {
 | |
|             if (names == null)
 | |
|             {
 | |
|                 return false;
 | |
|             }
 | |
|             for (int i = 0; i < names.Length; i++)
 | |
|             {
 | |
|                 if (FindResetterNode(node, names[i]) != null)
 | |
|                 {
 | |
|                     return true;
 | |
|                 }
 | |
|             }
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         private void FixNestedTag(string name, string[] resetters)
 | |
|         {
 | |
|             if (resetters == null)
 | |
|                 return;
 | |
| 
 | |
|             HtmlNode prev;
 | |
| 
 | |
|             // if we find a previous unclosed same name node, without a resetter node between, we must close it
 | |
|             prev = (HtmlNode) _lastnodes[name];
 | |
|             if ((prev != null) && (!prev.Closed))
 | |
|             {
 | |
|                 // try to find a resetter node, if found, we do nothing
 | |
|                 if (FindResetterNodes(prev, resetters))
 | |
|                 {
 | |
|                     return;
 | |
|                 }
 | |
| 
 | |
|                 // ok we need to close the prev now
 | |
|                 // create a fake closer node
 | |
|                 HtmlNode close = new HtmlNode(prev.NodeType, this, -1);
 | |
|                 close._endnode = close;
 | |
|                 prev.CloseNode(close);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private void FixNestedTags()
 | |
|         {
 | |
|             // we are only interested by start tags, not closing tags
 | |
|             if (!_currentnode._starttag)
 | |
|                 return;
 | |
| 
 | |
|             string name = CurrentNodeName();
 | |
|             FixNestedTag(name, GetResetters(name));
 | |
|         }
 | |
| 
 | |
|         private string[] GetResetters(string name)
 | |
|         {
 | |
|             switch (name)
 | |
|             {
 | |
|                 case "li":
 | |
|                     return new string[] {"ul"};
 | |
| 
 | |
|                 case "tr":
 | |
|                     return new string[] {"table"};
 | |
| 
 | |
|                 case "th":
 | |
|                 case "td":
 | |
|                     return new string[] {"tr", "table"};
 | |
| 
 | |
|                 default:
 | |
|                     return null;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private void IncrementPosition()
 | |
|         {
 | |
|             if (_crc32 != null)
 | |
|             {
 | |
|                 // REVIEW: should we add some checksum code in DecrementPosition too?
 | |
|                 _crc32.AddToCRC32(_c);
 | |
|             }
 | |
| 
 | |
|             _index++;
 | |
|             _maxlineposition = _lineposition;
 | |
|             if (_c == 10)
 | |
|             {
 | |
|                 _lineposition = 1;
 | |
|                 _line++;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 _lineposition++;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private bool NewCheck()
 | |
|         {
 | |
|             if (_c != '<')
 | |
|             {
 | |
|                 return false;
 | |
|             }
 | |
|             if (_index < _text.Length)
 | |
|             {
 | |
|                 if (_text[_index] == '%')
 | |
|                 {
 | |
|                     switch (_state)
 | |
|                     {
 | |
|                         case ParseState.AttributeAfterEquals:
 | |
|                             PushAttributeValueStart(_index - 1);
 | |
|                             break;
 | |
| 
 | |
|                         case ParseState.BetweenAttributes:
 | |
|                             PushAttributeNameStart(_index - 1);
 | |
|                             break;
 | |
| 
 | |
|                         case ParseState.WhichTag:
 | |
|                             PushNodeNameStart(true, _index - 1);
 | |
|                             _state = ParseState.Tag;
 | |
|                             break;
 | |
|                     }
 | |
|                     _oldstate = _state;
 | |
|                     _state = ParseState.ServerSideCode;
 | |
|                     return true;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if (!PushNodeEnd(_index - 1, true))
 | |
|             {
 | |
|                 // stop parsing
 | |
|                 _index = _text.Length;
 | |
|                 return true;
 | |
|             }
 | |
|             _state = ParseState.WhichTag;
 | |
|             if ((_index - 1) <= (_text.Length - 2))
 | |
|             {
 | |
|                 if (_text[_index] == '!')
 | |
|                 {
 | |
|                     PushNodeStart(HtmlNodeType.Comment, _index - 1);
 | |
|                     PushNodeNameStart(true, _index);
 | |
|                     PushNodeNameEnd(_index + 1);
 | |
|                     _state = ParseState.Comment;
 | |
|                     if (_index < (_text.Length - 2))
 | |
|                     {
 | |
|                         if ((_text[_index + 1] == '-') &&
 | |
|                             (_text[_index + 2] == '-'))
 | |
|                         {
 | |
|                             _fullcomment = true;
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             _fullcomment = false;
 | |
|                         }
 | |
|                     }
 | |
|                     return true;
 | |
|                 }
 | |
|             }
 | |
|             PushNodeStart(HtmlNodeType.Element, _index - 1);
 | |
|             return true;
 | |
|         }
 | |
| 
 | |
|         private void Parse()
 | |
|         {
 | |
|             int lastquote = 0;
 | |
|             if (OptionComputeChecksum)
 | |
|             {
 | |
|                 _crc32 = new Crc32();
 | |
|             }
 | |
| 
 | |
|             _lastnodes = new Hashtable();
 | |
|             _c = 0;
 | |
|             _fullcomment = false;
 | |
|             _parseerrors = new List<HtmlParseError>();
 | |
|             _line = 1;
 | |
|             _lineposition = 1;
 | |
|             _maxlineposition = 1;
 | |
| 
 | |
|             _state = ParseState.Text;
 | |
|             _oldstate = _state;
 | |
|             _documentnode._innerlength = _text.Length;
 | |
|             _documentnode._outerlength = _text.Length;
 | |
|             _remainderOffset = _text.Length;
 | |
| 
 | |
|             _lastparentnode = _documentnode;
 | |
|             _currentnode = CreateNode(HtmlNodeType.Text, 0);
 | |
|             _currentattribute = null;
 | |
| 
 | |
|             _index = 0;
 | |
|             PushNodeStart(HtmlNodeType.Text, 0);
 | |
|             while (_index < _text.Length)
 | |
|             {
 | |
|                 _c = _text[_index];
 | |
|                 IncrementPosition();
 | |
| 
 | |
|                 switch (_state)
 | |
|                 {
 | |
|                     case ParseState.Text:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.WhichTag:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
|                         if (_c == '/')
 | |
|                         {
 | |
|                             PushNodeNameStart(false, _index);
 | |
|                         }
 | |
|                         else
 | |
|                         {
 | |
|                             PushNodeNameStart(true, _index - 1);
 | |
|                             DecrementPosition();
 | |
|                         }
 | |
|                         _state = ParseState.Tag;
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.Tag:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
|                         if (IsWhiteSpace(_c))
 | |
|                         {
 | |
|                             PushNodeNameEnd(_index - 1);
 | |
|                             if (_state != ParseState.Tag)
 | |
|                                 continue;
 | |
|                             _state = ParseState.BetweenAttributes;
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '/')
 | |
|                         {
 | |
|                             PushNodeNameEnd(_index - 1);
 | |
|                             if (_state != ParseState.Tag)
 | |
|                                 continue;
 | |
|                             _state = ParseState.EmptyTag;
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             PushNodeNameEnd(_index - 1);
 | |
|                             if (_state != ParseState.Tag)
 | |
|                                 continue;
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
|                             if (_state != ParseState.Tag)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.BetweenAttributes:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
| 
 | |
|                         if (IsWhiteSpace(_c))
 | |
|                             continue;
 | |
| 
 | |
|                         if ((_c == '/') || (_c == '?'))
 | |
|                         {
 | |
|                             _state = ParseState.EmptyTag;
 | |
|                             continue;
 | |
|                         }
 | |
| 
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
| 
 | |
|                             if (_state != ParseState.BetweenAttributes)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
| 
 | |
|                         PushAttributeNameStart(_index - 1);
 | |
|                         _state = ParseState.AttributeName;
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.EmptyTag:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
| 
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             if (!PushNodeEnd(_index, true))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
| 
 | |
|                             if (_state != ParseState.EmptyTag)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
|                         _state = ParseState.BetweenAttributes;
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.AttributeName:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
| 
 | |
|                         if (IsWhiteSpace(_c))
 | |
|                         {
 | |
|                             PushAttributeNameEnd(_index - 1);
 | |
|                             _state = ParseState.AttributeBeforeEquals;
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '=')
 | |
|                         {
 | |
|                             PushAttributeNameEnd(_index - 1);
 | |
|                             _state = ParseState.AttributeAfterEquals;
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             PushAttributeNameEnd(_index - 1);
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
|                             if (_state != ParseState.AttributeName)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.AttributeBeforeEquals:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
| 
 | |
|                         if (IsWhiteSpace(_c))
 | |
|                             continue;
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
|                             if (_state != ParseState.AttributeBeforeEquals)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '=')
 | |
|                         {
 | |
|                             _state = ParseState.AttributeAfterEquals;
 | |
|                             continue;
 | |
|                         }
 | |
|                         // no equals, no whitespace, it's a new attrribute starting
 | |
|                         _state = ParseState.BetweenAttributes;
 | |
|                         DecrementPosition();
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.AttributeAfterEquals:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
| 
 | |
|                         if (IsWhiteSpace(_c))
 | |
|                             continue;
 | |
| 
 | |
|                         if ((_c == '\'') || (_c == '"'))
 | |
|                         {
 | |
|                             _state = ParseState.QuotedAttributeValue;
 | |
|                             PushAttributeValueStart(_index, _c);
 | |
|                             lastquote = _c;
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
|                             if (_state != ParseState.AttributeAfterEquals)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
|                         PushAttributeValueStart(_index - 1);
 | |
|                         _state = ParseState.AttributeValue;
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.AttributeValue:
 | |
|                         if (NewCheck())
 | |
|                             continue;
 | |
| 
 | |
|                         if (IsWhiteSpace(_c))
 | |
|                         {
 | |
|                             PushAttributeValueEnd(_index - 1);
 | |
|                             _state = ParseState.BetweenAttributes;
 | |
|                             continue;
 | |
|                         }
 | |
| 
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             PushAttributeValueEnd(_index - 1);
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
|                             if (_state != ParseState.AttributeValue)
 | |
|                                 continue;
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.QuotedAttributeValue:
 | |
|                         if (_c == lastquote)
 | |
|                         {
 | |
|                             PushAttributeValueEnd(_index - 1);
 | |
|                             _state = ParseState.BetweenAttributes;
 | |
|                             continue;
 | |
|                         }
 | |
|                         if (_c == '<')
 | |
|                         {
 | |
|                             if (_index < _text.Length)
 | |
|                             {
 | |
|                                 if (_text[_index] == '%')
 | |
|                                 {
 | |
|                                     _oldstate = _state;
 | |
|                                     _state = ParseState.ServerSideCode;
 | |
|                                     continue;
 | |
|                                 }
 | |
|                             }
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.Comment:
 | |
|                         if (_c == '>')
 | |
|                         {
 | |
|                             if (_fullcomment)
 | |
|                             {
 | |
|                                 if ((_text[_index - 2] != '-') ||
 | |
|                                     (_text[_index - 3] != '-'))
 | |
|                                 {
 | |
|                                     continue;
 | |
|                                 }
 | |
|                             }
 | |
|                             if (!PushNodeEnd(_index, false))
 | |
|                             {
 | |
|                                 // stop parsing
 | |
|                                 _index = _text.Length;
 | |
|                                 break;
 | |
|                             }
 | |
|                             _state = ParseState.Text;
 | |
|                             PushNodeStart(HtmlNodeType.Text, _index);
 | |
|                             continue;
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.ServerSideCode:
 | |
|                         if (_c == '%')
 | |
|                         {
 | |
|                             if (_index < _text.Length)
 | |
|                             {
 | |
|                                 if (_text[_index] == '>')
 | |
|                                 {
 | |
|                                     switch (_oldstate)
 | |
|                                     {
 | |
|                                         case ParseState.AttributeAfterEquals:
 | |
|                                             _state = ParseState.AttributeValue;
 | |
|                                             break;
 | |
| 
 | |
|                                         case ParseState.BetweenAttributes:
 | |
|                                             PushAttributeNameEnd(_index + 1);
 | |
|                                             _state = ParseState.BetweenAttributes;
 | |
|                                             break;
 | |
| 
 | |
|                                         default:
 | |
|                                             _state = _oldstate;
 | |
|                                             break;
 | |
|                                     }
 | |
|                                     IncrementPosition();
 | |
|                                 }
 | |
|                             }
 | |
|                         }
 | |
|                         break;
 | |
| 
 | |
|                     case ParseState.PcData:
 | |
|                         // look for </tag + 1 char
 | |
| 
 | |
|                         // check buffer end
 | |
|                         if ((_currentnode._namelength + 3) <= (_text.Length - (_index - 1)))
 | |
|                         {
 | |
|                             if (string.Compare(_text.Substring(_index - 1, _currentnode._namelength + 2),
 | |
|                                                "</" + _currentnode.Name, true) == 0)
 | |
|                             {
 | |
|                                 int c = _text[_index - 1 + 2 + _currentnode.Name.Length];
 | |
|                                 if ((c == '>') || (IsWhiteSpace(c)))
 | |
|                                 {
 | |
|                                     // add the script as a text node
 | |
|                                     HtmlNode script = CreateNode(HtmlNodeType.Text,
 | |
|                                                                  _currentnode._outerstartindex +
 | |
|                                                                  _currentnode._outerlength);
 | |
|                                     script._outerlength = _index - 1 - script._outerstartindex;
 | |
|                                     _currentnode.AppendChild(script);
 | |
| 
 | |
| 
 | |
|                                     PushNodeStart(HtmlNodeType.Element, _index - 1);
 | |
|                                     PushNodeNameStart(false, _index - 1 + 2);
 | |
|                                     _state = ParseState.Tag;
 | |
|                                     IncrementPosition();
 | |
|                                 }
 | |
|                             }
 | |
|                         }
 | |
|                         break;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             // finish the current work
 | |
|             if (_currentnode._namestartindex > 0)
 | |
|             {
 | |
|                 PushNodeNameEnd(_index);
 | |
|             }
 | |
|             PushNodeEnd(_index, false);
 | |
| 
 | |
|             // we don't need this anymore
 | |
|             _lastnodes.Clear();
 | |
|         }
 | |
| 
 | |
|         private void PushAttributeNameEnd(int index)
 | |
|         {
 | |
|             _currentattribute._namelength = index - _currentattribute._namestartindex;
 | |
|             _currentnode.Attributes.Append(_currentattribute);
 | |
|         }
 | |
| 
 | |
|         private void PushAttributeNameStart(int index)
 | |
|         {
 | |
|             _currentattribute = CreateAttribute();
 | |
|             _currentattribute._namestartindex = index;
 | |
|             _currentattribute.Line = _line;
 | |
|             _currentattribute._lineposition = _lineposition;
 | |
|             _currentattribute._streamposition = index;
 | |
|         }
 | |
| 
 | |
|         private void PushAttributeValueEnd(int index)
 | |
|         {
 | |
|             _currentattribute._valuelength = index - _currentattribute._valuestartindex;
 | |
|         }
 | |
| 
 | |
|         private void PushAttributeValueStart(int index)
 | |
|         {
 | |
|             PushAttributeValueStart(index, 0);
 | |
|         }
 | |
| 
 | |
|         private void PushAttributeValueStart(int index, int quote)
 | |
|         {
 | |
|             _currentattribute._valuestartindex = index;
 | |
|             if (quote == '\'')
 | |
|                 _currentattribute.QuoteType = AttributeValueQuote.SingleQuote;
 | |
|         }
 | |
| 
 | |
|         private bool PushNodeEnd(int index, bool close)
 | |
|         {
 | |
|             _currentnode._outerlength = index - _currentnode._outerstartindex;
 | |
| 
 | |
|             if ((_currentnode._nodetype == HtmlNodeType.Text) ||
 | |
|                 (_currentnode._nodetype == HtmlNodeType.Comment))
 | |
|             {
 | |
|                 // forget about void nodes
 | |
|                 if (_currentnode._outerlength > 0)
 | |
|                 {
 | |
|                     _currentnode._innerlength = _currentnode._outerlength;
 | |
|                     _currentnode._innerstartindex = _currentnode._outerstartindex;
 | |
|                     if (_lastparentnode != null)
 | |
|                     {
 | |
|                         _lastparentnode.AppendChild(_currentnode);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 if ((_currentnode._starttag) && (_lastparentnode != _currentnode))
 | |
|                 {
 | |
|                     // add to parent node
 | |
|                     if (_lastparentnode != null)
 | |
|                     {
 | |
|                         _lastparentnode.AppendChild(_currentnode);
 | |
|                     }
 | |
| 
 | |
|                     ReadDocumentEncoding(_currentnode);
 | |
| 
 | |
|                     // remember last node of this kind
 | |
|                     HtmlNode prev = (HtmlNode) _lastnodes[_currentnode.Name];
 | |
|                     _currentnode._prevwithsamename = prev;
 | |
|                     _lastnodes[_currentnode.Name] = _currentnode;
 | |
| 
 | |
|                     // change parent?
 | |
|                     if ((_currentnode.NodeType == HtmlNodeType.Document) ||
 | |
|                         (_currentnode.NodeType == HtmlNodeType.Element))
 | |
|                     {
 | |
|                         _lastparentnode = _currentnode;
 | |
|                     }
 | |
| 
 | |
|                     if (HtmlNode.IsCDataElement(CurrentNodeName()))
 | |
|                     {
 | |
|                         _state = ParseState.PcData;
 | |
|                         return true;
 | |
|                     }
 | |
| 
 | |
|                     if ((HtmlNode.IsClosedElement(_currentnode.Name)) ||
 | |
|                         (HtmlNode.IsEmptyElement(_currentnode.Name)))
 | |
|                     {
 | |
|                         close = true;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if ((close) || (!_currentnode._starttag))
 | |
|             {
 | |
|                 if ((OptionStopperNodeName != null) && (_remainder == null) &&
 | |
|                     (string.Compare(_currentnode.Name, OptionStopperNodeName, true) == 0))
 | |
|                 {
 | |
|                     _remainderOffset = index;
 | |
|                     _remainder = _text.Substring(_remainderOffset);
 | |
|                     CloseCurrentNode();
 | |
|                     return false; // stop parsing
 | |
|                 }
 | |
|                 CloseCurrentNode();
 | |
|             }
 | |
|             return true;
 | |
|         }
 | |
| 
 | |
|         private void PushNodeNameEnd(int index)
 | |
|         {
 | |
|             _currentnode._namelength = index - _currentnode._namestartindex;
 | |
|             if (OptionFixNestedTags)
 | |
|             {
 | |
|                 FixNestedTags();
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         private void PushNodeNameStart(bool starttag, int index)
 | |
|         {
 | |
|             _currentnode._starttag = starttag;
 | |
|             _currentnode._namestartindex = index;
 | |
|         }
 | |
| 
 | |
|         private void PushNodeStart(HtmlNodeType type, int index)
 | |
|         {
 | |
|             _currentnode = CreateNode(type, index);
 | |
|             _currentnode._line = _line;
 | |
|             _currentnode._lineposition = _lineposition;
 | |
|             if (type == HtmlNodeType.Element)
 | |
|             {
 | |
|                 _currentnode._lineposition--;
 | |
|             }
 | |
|             _currentnode._streamposition = index;
 | |
|         }
 | |
| 
 | |
|         private void ReadDocumentEncoding(HtmlNode node)
 | |
|         {
 | |
|             if (!OptionReadEncoding)
 | |
|                 return;
 | |
|             // format is 
 | |
|             // <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />
 | |
| 
 | |
|             // when we append a child, we are in node end, so attributes are already populated
 | |
|             if (node._namelength == 4) // quick check, avoids string alloc
 | |
|             {
 | |
|                 if (node.Name == "meta") // all nodes names are lowercase
 | |
|                 {
 | |
|                     HtmlAttribute att = node.Attributes["http-equiv"];
 | |
|                     if (att != null)
 | |
|                     {
 | |
|                         if (string.Compare(att.Value, "content-type", true) == 0)
 | |
|                         {
 | |
|                             HtmlAttribute content = node.Attributes["content"];
 | |
|                             if (content != null)
 | |
|                             {
 | |
|                                 string charset = NameValuePairList.GetNameValuePairsValue(content.Value, "charset");
 | |
|                                 if (charset != null && (charset = charset.Trim()).Length > 0)
 | |
|                                 {
 | |
|                                     _declaredencoding = Encoding.GetEncoding(charset.Trim());
 | |
|                                     if (_onlyDetectEncoding)
 | |
|                                     {
 | |
|                                         throw new EncodingFoundException(_declaredencoding);
 | |
|                                     }
 | |
| 
 | |
|                                     if (_streamencoding != null)
 | |
|                                     {
 | |
|                                         if (_declaredencoding.WindowsCodePage != _streamencoding.WindowsCodePage)
 | |
|                                         {
 | |
|                                             AddError(
 | |
|                                                 HtmlParseErrorCode.CharsetMismatch,
 | |
|                                                 _line, _lineposition,
 | |
|                                                 _index, node.OuterHtml,
 | |
|                                                 "Encoding mismatch between StreamEncoding: " +
 | |
|                                                 _streamencoding.WebName + " and DeclaredEncoding: " +
 | |
|                                                 _declaredencoding.WebName);
 | |
|                                         }
 | |
|                                     }
 | |
|                                 }
 | |
|                             }
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Nested type: ParseState
 | |
| 
 | |
|         private enum ParseState
 | |
|         {
 | |
|             Text,
 | |
|             WhichTag,
 | |
|             Tag,
 | |
|             BetweenAttributes,
 | |
|             EmptyTag,
 | |
|             AttributeName,
 | |
|             AttributeBeforeEquals,
 | |
|             AttributeAfterEquals,
 | |
|             AttributeValue,
 | |
|             Comment,
 | |
|             QuotedAttributeValue,
 | |
|             ServerSideCode,
 | |
|             PcData
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
|     }
 | |
| } |