Imported Upstream version 4.6.0.125

Former-commit-id: a2155e9bd80020e49e72e86c44da02a8ac0e57a4
2016-08-03 10:59:49 +00:00
parent a569aebcfd
commit e79aa3c0ed
17047 changed files with 3137615 additions and 392334 deletions
--- a/mcs/class/referencesource/System.Web.Services/System/Web/Services/Discovery/LinkGrep.cs
+++ b/mcs/class/referencesource/System.Web.Services/System/Web/Services/Discovery/LinkGrep.cs
@ -0,0 +1,205 @@
+//------------------------------------------------------------------------------
+// <copyright file="LinkGrep.cs" company="Microsoft">
+//     Copyright (c) Microsoft Corporation.  All rights reserved.
+// </copyright>                                                                
+//------------------------------------------------------------------------------
+
+namespace System.Web.Services.Discovery {
+
+    using System;
+    using System.IO;
+    using System.Text.RegularExpressions;
+    using System.Web.Services.Protocols;
+    using System.Globalization;
+    
+    internal class LinkGrep {
+        private LinkGrep() { }
+
+        private static string ReadEntireStream(TextReader input) {
+            char[] buffer = new char[4096];
+            int bufferpos = 0;
+            int delta;
+    
+            // 
+            for (;;) {
+                delta = input.Read(buffer, bufferpos, buffer.Length - bufferpos);
+                
+                if (delta == 0)
+                    break;
+    
+                bufferpos += delta;
+    
+                if (bufferpos == buffer.Length) {
+                    char[] newbuf = new char[buffer.Length * 2];
+                    System.Array.Copy(buffer, 0, newbuf, 0, buffer.Length);
+                    buffer = newbuf;
+                }
+            }
+    
+            return new string(buffer, 0, bufferpos);
+        }
+
+        internal static string SearchForLink(Stream stream) {
+            string text = null;
+            text = ReadEntireStream(new StreamReader(stream));
+
+            int textpos = 0;
+
+            Match match;
+
+            if ((match = doctypeDirectiveRegex.Match(text, textpos)).Success) {
+                textpos += match.Length;
+            }
+
+            bool oneMatch;
+            for (;;) {
+                
+                // Reset match flag
+                oneMatch = false;
+                
+                // 1: scan for text up to the next tag.
+    
+                // First case: check for whitespace going all the way to the next tag
+                
+                if ((match = whitespaceRegex.Match(text, textpos)).Success) {
+                    oneMatch = true;
+                }
+                
+                // Second case: there may be some nonwhitespace; scan it
+                
+                else if ((match = textRegex.Match(text, textpos)).Success) {
+                    oneMatch = true;
+                }
+    
+                // we might be done now
+                
+                textpos += match.Length;
+                if (textpos == text.Length)
+                    break;
+               
+                // 2: handle constructs that start with <
+    
+                // First, check to see if it's a tag
+    
+                if ((match = tagRegex.Match(text, textpos)).Success)
+                {
+                    oneMatch = true;
+                    string tag = match.Groups["tagname"].Value;
+
+                    if (String.Compare(tag, "link", StringComparison.OrdinalIgnoreCase) == 0) {
+                        CaptureCollection attrnames = match.Groups["attrname"].Captures;
+                        CaptureCollection attrvalues = match.Groups["attrval"].Captures;
+
+                        int count = attrnames.Count;
+                        bool rightType = false;
+                        bool rightRel = false;
+                        string href = null;
+                        for (int i = 0; i < count; i++) {
+                            string attrName = attrnames[i].ToString();
+                            string attrValue = attrvalues[i].ToString();
+                            if (String.Compare(attrName, "type", StringComparison.OrdinalIgnoreCase) == 0 &&
+                                ContentType.MatchesBase(attrValue, ContentType.TextXml)) {
+                                rightType = true;
+                            }
+                            else if (String.Compare(attrName, "rel", StringComparison.OrdinalIgnoreCase) == 0 &&
+                                String.Compare(attrValue, "alternate", StringComparison.OrdinalIgnoreCase) == 0) {
+                                rightRel = true;
+                            }
+                            else if (String.Compare(attrName, "href", StringComparison.OrdinalIgnoreCase) == 0) {
+                                href = attrValue;
+                            }
+
+                            if (rightType && rightRel && href != null) {
+                                // Got a link to a disco file!
+                                return href;
+                            }
+                        }
+                    }
+                    else if (tag == "body") {
+                        // If body begins, get out, since link tags should only be defined in the head
+                        break;
+                    }
+
+                }
+    
+                // check to see if it's an end tag
+                
+                else if ((match = endtagRegex.Match(text, textpos)).Success) {
+                    oneMatch = true;
+                }
+    
+                // check to see if it's a comment
+    
+                else if ((match = commentRegex.Match(text, textpos)).Success) {
+                    oneMatch = true;
+                }
+               
+                // we might be done now
+                
+                textpos += match.Length;
+
+                if (textpos == text.Length)
+                    break;
+
+                // If we couldn't get one single match, it means that it's probably not HTML, so bail
+                if (!oneMatch) {
+                    break;
+                }
+            }
+
+            return null;
+        }
+        
+        private readonly static Regex tagRegex = new Regex
+        (
+            "\\G<" +                                    // leading <
+            "(?<prefix>[\\w:.-]+(?=:)|):?" +            // optional prefix:
+            "(?<tagname>[\\w.-]+)" +                    // tagname
+    
+            "(?:\\s+" +                                 // zero or more attributes
+                "(?<attrprefix>[\\w:.-]+(?=:)|):?" +    //     optional attrprefix:
+                "(?<attrname>[\\w.-]+)" +               //     attrname
+                "\\s*=\\s*" +                           //     required equals
+                "(?:" +                                 //     quoted value
+                    "\"(?<attrval>[^\"]*)\"" +          //          double quoted attrval
+                    "|'(?<attrval>[^\']*)'" +           //          single quoted attrval
+                    "|(?<attrval>[a-zA-Z0-9\\-._:]+)" + //          attrval with no quotes (SGML-approved chars)
+                ")" +                                   //     end quoted value
+            ")*" +                                      // end attribute
+            
+            "\\s*(?<empty>/)?>"                         // optional trailing /, and trailing >
+        );
+            
+        private readonly static Regex doctypeDirectiveRegex = new Regex
+        (
+            @"\G<!doctype\b(([\s\w]+)|("".*""))*>", 
+            RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace
+        );
+
+        private readonly static Regex endtagRegex = new Regex
+        (
+            "\\G</" +                                   // leading </
+            "(?<prefix>[\\w:-]+(?=:)|):?" +             // optional prefix:
+            "(?<tagname>[\\w-]+)\\s*>"                 // tagname
+        );
+        
+        private readonly static Regex commentRegex = new Regex
+        (
+            "\\G<!--" +                                 // leading <!--
+            "(?>[^-]*-)+?" +                            // one or more chunks of text ending with -, minimal
+            "->"                                        // trailing ->
+    
+        );
+        
+        private readonly static Regex whitespaceRegex = new Regex
+        (
+            "\\G\\s+" +                                 // at least one char of whitespace
+            "(?=<|\\Z)"                                 // ending with either '<' or the end of the string
+        );
+        
+        private readonly static Regex textRegex = new Regex
+        (
+            "\\G[^<]+"                                  // at least one char on non-'<', maximal
+        );
+    }
+}