using this code and I am trying to get all of the HTML Tables. One of the table is commented out <!--<table>, I want to include that in the output. When I do this logic I am only seeing one table. Is there a way to get all table?
static void Main(string[] args)
{
string SEPARATOR = ",";
//WebClient webClient = new WebClient();
//string page = webClient.DownloadString("https://www.pro-football-reference.com/boxscores/202209110det.htm");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
string HTML = "<html><head>\r\n</head>\r\n<body contenteditable=\"false\">\r\n\r\n<h2>HTML Table</h2>\r\n\r\n<!--<table>\r\n <tbody><tr>\r\n <th>Company</th>\r\n <th>Contact</th>\r\n <th>Country</th>\r\n </tr>\r\n <tr>\r\n <td>Alfreds Futterkiste</td>\r\n <td>Maria Anders</td>\r\n <td>Germany</td>\r\n </tr>\r\n</tbody></table>-->\r\n\r\n<table>\r\n <tbody><tr>\r\n <th>Company</th>\r\n <th>Contact</th>\r\n <th>Country</th>\r\n </tr>\r\n <tr>\r\n <td>Alfreds Futterkiste</td>\r\n <td>Maria Anders</td>\r\n <td>Germany</td>\r\n </tr>\r\n</tbody></table>\r\n</body></html>";
doc.LoadHtml(HTML);
var tables = doc.DocumentNode.Descendants("table");
int tablesCount = tables.Count();
int x = 0;
foreach ( var table in tables)
{
var rows = table.Descendants("tr")
.Select(tr => tr.Descendants("td").Select(td => td.InnerText).ToList())
.ToList();
foreach (var row in rows)
Console.WriteLine(String.Join(",", row));
string tableName = string.Format("{0}_{1}.csv",table.Name, x);
using (StreamWriter writer = new StreamWriter(tableName))
{
rows.ForEach(line =>
{
var lineArray = line.Select(c =>
c.Contains(SEPARATOR) ? c.Replace(SEPARATOR.ToString(), "\\" + SEPARATOR) : c).ToArray();
writer.WriteLine(string.Join(SEPARATOR, lineArray));
});
}
x ++;
}
}
