Why does attempt to access InnerText property cause Stack Overflow Expection when using HtmlAgilityPack?

145 Views Asked by At

I'm building an HTML preprocessor using HTMLAgilityPack that builds JSON representations of Html files. To do this, I'm using a simple Node class to contain the necessary properties from the HtmlNode objects provided by HtmlAgilityPack.

Unfortunately, I am consistently getting a Stack overflow exception that crashes my program. It occurs in the last if statement of the GenerateNode() method. As you can see from my code, I have tried unsuccessfully to extract the text from an HtmlTextNode and write it to the Node.InnerText property of my custom Node class. Each of the commented lines throws a stack overflow exception.

For the sake of completeness, I've included all the code in my WebHelpPreprocessor class, but it's only the first code block that concerns this question.

First Block

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;
//
using Newtonsoft.Json;

namespace WebHelpPreprocessor
{
    /// <summary>
    /// A static class to help process html
    /// </summary>
    public static class Preprocessor
    {
        private static Node GenerateNode(HtmlNode htmlNode)
        {
            Node node = new Node(htmlNode.Name, htmlNode.Attributes["guid"].Value);
            if(htmlNode.Attributes["class"] != null)
            {
                node.Classes = htmlNode.Attributes["class"].Value;
            }
            if(htmlNode.Attributes["id"] != null)
            {
                node.Id = htmlNode.Attributes["id"].Value;
            }
            if(htmlNode.Name == "#text")
            {
                //node.InnerText = htmlNode.InnerHtml;
                //Console.WriteLine(htmlNode.InnerText.Trim());
                //Console.WriteLine(JsonConvert.SerializeObject(htmlNode));
                Console.WriteLine(htmlNode.Name);

            }
            return node;
        }

Here is the rest of the code

        /// <summary>
        /// Builds a DocumentTree from the root node using breadth-first search
        /// </summary>
        /// <param name="rootNode">The root node to use for the model</param>
        /// <returns>a completed document tree</returns>
        public static DocumentTree BuildModelBFS(HtmlNode root)
        {
            int nodeCount = 1;
            root.Attributes.Add("guid", System.Guid.NewGuid().ToString());
            DocumentTree documentModel = new DocumentTree(new Node(root.Name, root.Attributes["guid"].Value));
            Queue<HtmlNode> q = new Queue<HtmlNode>();

            q.Enqueue(root);
            while (q.Count > 0)
            {
                HtmlNode current = q.Dequeue();
                Node currentNode = documentModel.find(current.Attributes["guid"].Value);
                if (current == null)
                {
                    continue;
                }
                if(current.HasChildNodes)
                {
                    foreach(HtmlNode child in current.ChildNodes)
                    {
                        nodeCount++;
                        child.Attributes.Add("guid", System.Guid.NewGuid().ToString());
                        documentModel.AddToTree(GenerateNode(child), currentNode);
                        q.Enqueue(child);
                    }
                }

                //------------------Debugging
                string id;
                if(current.Attributes["id"] != null)
                {
                    id = current.Attributes["id"].Value;
                }
                else
                {
                    id = "0";
                }
                if(current.Name != "#text")
                {
                    Console.WriteLine("Current node: " + current.Name + " ID: " + id + " Hash Code: " + current.GetHashCode());
                }
                //--------------------End of debugging
            }

            return documentModel;
        }

        /// <summary>
        /// Builds a DocumentTree from the root node using depth-first search
        /// </summary>
        /// <param name="rootNode">The root node to use for the model</param>
        /// <returns>a completed document tree</returns>
        public static DocumentTree BuildModelDFS(HtmlNode root)
        {

            return null;
        }
    }
}
0

There are 0 best solutions below