HtmlAgilityPack not getting all the tables

52 Views Asked by At

using this code and I am trying to get all of the HTML Tables. One of the table is commented out <!--<table>, I want to include that in the output. When I do this logic I am only seeing one table. Is there a way to get all table?

enter image description here

static void Main(string[] args)
{
    string SEPARATOR = ",";
    //WebClient webClient = new WebClient();



    //string page = webClient.DownloadString("https://www.pro-football-reference.com/boxscores/202209110det.htm");

    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();

    string HTML = "<html><head>\r\n</head>\r\n<body contenteditable=\"false\">\r\n\r\n<h2>HTML Table</h2>\r\n\r\n<!--<table>\r\n  <tbody><tr>\r\n    <th>Company</th>\r\n    <th>Contact</th>\r\n    <th>Country</th>\r\n  </tr>\r\n  <tr>\r\n    <td>Alfreds Futterkiste</td>\r\n    <td>Maria Anders</td>\r\n    <td>Germany</td>\r\n  </tr>\r\n</tbody></table>-->\r\n\r\n<table>\r\n  <tbody><tr>\r\n    <th>Company</th>\r\n    <th>Contact</th>\r\n    <th>Country</th>\r\n  </tr>\r\n  <tr>\r\n    <td>Alfreds Futterkiste</td>\r\n    <td>Maria Anders</td>\r\n    <td>Germany</td>\r\n  </tr>\r\n</tbody></table>\r\n</body></html>";
    doc.LoadHtml(HTML);

    var tables = doc.DocumentNode.Descendants("table");
    int tablesCount = tables.Count();
    int x = 0;
    foreach ( var table in tables)
    {
        var rows = table.Descendants("tr")
            .Select(tr => tr.Descendants("td").Select(td => td.InnerText).ToList())
            .ToList();

        foreach (var row in rows)
            Console.WriteLine(String.Join(",", row));

        string tableName = string.Format("{0}_{1}.csv",table.Name, x);

        using (StreamWriter writer = new StreamWriter(tableName))
        {
            rows.ForEach(line =>
            {
                var lineArray = line.Select(c =>
                    c.Contains(SEPARATOR) ? c.Replace(SEPARATOR.ToString(), "\\" + SEPARATOR) : c).ToArray();
                writer.WriteLine(string.Join(SEPARATOR, lineArray));
            });
        }
        x ++;

    }
}
0

There are 0 best solutions below