I recently read an article on web scraping, which sounded interesting, but I wanted to employ the libraries inside of a Swing form. Here is the tutorial I was modeling: https://www.scrapingbee.com/java-webscraping-book/ I didn't change much, just basically allowed the user to type the URL into a text box. The exception I keep getting back is a java.lang.NoClassDefError, which, when I look it up, is explained as a scenario in which the class in question could be located at compile time, but not runtime. I have included all of the jar files in the htmlunit library, I can easily import the one in question, WebClient, but, although it says build successful at the bottom, the application never actually opens. Below is the code, and the error messages I am receiving. Just to test, I copied the code, pasted it into netbeans, brought in all of the necessary jar files, and the exact same error comes up about the WebClient class. At any rate, here is the code I used, and as always, I very much appreciate anyone taking their time to help me.
package webscrape;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import java.awt.Color;
import java.util.List;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.StyleSheet;
/**
*
* @author glass
*/
public class JsoupTutorial extends javax.swing.JFrame {
private String baseUrl;
private final WebClient client = new WebClient();
/**
* Creates new form JsoupTutorial
*/
public JsoupTutorial() {
this.responseArea.setBackground(Color.BLACK);
this.responseArea.setEditable(false);
this.responseArea.setContentType("text/html");
HTMLEditorKit htmlKit = (HTMLEditorKit) this.responseArea.getEditorKitForContentType("text/html");
initComponents();
}
/**
* This method is called from within the constructor to initialize the form.
* WARNING: Do NOT modify this code. The content of this method is always
* regenerated by the Form Editor.
*/
@SuppressWarnings("unchecked")
// <editor-fold defaultstate="collapsed" desc="Generated Code">
private void initComponents() {
title = new javax.swing.JLabel();
urlTxtBox = new javax.swing.JTextField();
enterTxt = new javax.swing.JLabel();
enterBtn = new javax.swing.JButton();
jScrollPane1 = new javax.swing.JScrollPane();
responseArea = new javax.swing.JEditorPane();
setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
title.setText("Jsoup Testing");
enterTxt.setText("Enter a website:");
enterBtn.setText("Enter");
enterBtn.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
enterBtnActionPerformed(evt);
}
});
jScrollPane1.setViewportView(responseArea);
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
getContentPane().setLayout(layout);
layout.setHorizontalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addGap(342, 342, 342)
.addComponent(title))
.addGroup(layout.createSequentialGroup()
.addGap(24, 24, 24)
.addComponent(enterTxt)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false)
.addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 298, Short.MAX_VALUE)
.addComponent(urlTxtBox))
.addGap(50, 50, 50)
.addComponent(enterBtn)))
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
);
layout.setVerticalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addContainerGap()
.addComponent(title)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
.addComponent(urlTxtBox, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(enterTxt)
.addComponent(enterBtn))
.addGap(18, 18, 18)
.addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 192, javax.swing.GroupLayout.PREFERRED_SIZE)
.addContainerGap(71, Short.MAX_VALUE))
);
pack();
}// </editor-fold>
private void enterBtnActionPerformed(java.awt.event.ActionEvent evt) {
// TODO add your handling code here:
baseUrl = this.urlTxtBox.getText();
//disable the css and javascript -- sites heavy in Ajax calls, this code wont work on
client.getOptions().setCssEnabled(false);
client.getOptions().setJavaScriptEnabled(false);
try(client) {
HtmlPage page = client.getPage(baseUrl);
List<HtmlElement> itemList = page.getByXPath("//tr[@class='athing']");
if(itemList.isEmpty())
{
System.out.println("No item found!");
}
else
{
for(HtmlElement htmlItem : itemList)
{
int position = Integer.parseInt(((HtmlElement)htmlItem.getFirstByXPath("./td/span")).asText().replace(".", ""));
int id = Integer.parseInt(htmlItem.getAttribute("id"));
String thetitle = ((HtmlElement) htmlItem.getFirstByXPath("./td[not@valign='top')][@class='title']")).asText();
String url = ((HtmlAnchor) htmlItem.getFirstByXPath("./td[not(@valign='top')][@class='title']/a")).getHrefAttribute();
String author = ((HtmlElement) htmlItem.getFirstByXPath("./following-sibling::tr/td[@class='subtext']/a[@class='hnuser']")).asText();
int score = Integer.parseInt(((HtmlElement) htmlItem.getFirstByXPath("./following-sibling::tr/td[@class='subtext']/span@class='score']")).asText().replace(" points", ""));
HackerNewsItem hnItem = new HackerNewsItem(thetitle, url, author, score, position, id);
ObjectMapper mapper = new ObjectMapper();
String jsonString = mapper.writeValueAsString(hnItem);
System.out.println(jsonString);
this.responseArea.setText("<html><head><title>Json Response Below:</title></head></html>");
}
}
} catch (IOException ex) {
Logger.getLogger(JsoupTutorial.class.getName()).log(Level.SEVERE, null, ex);
} catch (FailingHttpStatusCodeException ex) {
Logger.getLogger(JsoupTutorial.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
* @param args the command line arguments
*/
public static void main(String args[]) {
/* Set the Nimbus look and feel */
//<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
/* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
* For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
*/
try {
for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
if ("Nimbus".equals(info.getName())) {
javax.swing.UIManager.setLookAndFeel(info.getClassName());
break;
}
}
} catch (ClassNotFoundException ex) {
java.util.logging.Logger.getLogger(JsoupTutorial.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
} catch (InstantiationException ex) {
java.util.logging.Logger.getLogger(JsoupTutorial.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
} catch (IllegalAccessException ex) {
java.util.logging.Logger.getLogger(JsoupTutorial.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
} catch (javax.swing.UnsupportedLookAndFeelException ex) {
java.util.logging.Logger.getLogger(JsoupTutorial.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
}
//</editor-fold>
/* Create and display the form */
java.awt.EventQueue.invokeLater(new Runnable() {
public void run() {
new JsoupTutorial().setVisible(true);
}
});
}
// Variables declaration - do not modify
private javax.swing.JButton enterBtn;
private javax.swing.JLabel enterTxt;
private javax.swing.JScrollPane jScrollPane1;
private javax.swing.JEditorPane responseArea;
private javax.swing.JLabel title;
private javax.swing.JTextField urlTxtBox;
// End of variables declaration
}
And here is the other class:
package webscrape;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* @author glass
*/
public class HackerNewsItem {
private String title;
private String url;
private String author;
private int score;
private int position;
private int id;
public HackerNewsItem(String title, String url, String author, int score, int position, int id)
{
this.title = title;
this.url = url;
this.author = author;
this.score = score;
this.position =position;
this.id = id;
}
}
This is the error I am getting back when I compile in Netbeans:
Exception in thread "AWT-EventQueue-0" java.lang.NoClassDefFoundError: org/apache/xerces/xni/XNIException
at com.gargoylesoftware.htmlunit.DefaultPageCreator.<clinit>(DefaultPageCreator.java:91)
at com.gargoylesoftware.htmlunit.WebClient.<init>(WebClient.java:187)
at com.gargoylesoftware.htmlunit.WebClient.<init>(WebClient.java:269)
at com.gargoylesoftware.htmlunit.WebClient.<init>(WebClient.java:259)
at com.gargoylesoftware.htmlunit.WebClient.<init>(WebClient.java:251)
at webscrape.JsoupTutorial.<init>(JsoupTutorial.java:31)
at webscrape.JsoupTutorial$2.run(JsoupTutorial.java:190)
at java.desktop/java.awt.event.InvocationEvent.dispatch(InvocationEvent.java:316)
at java.desktop/java.awt.EventQueue.dispatchEventImpl(EventQueue.java:770)
at java.desktop/java.awt.EventQueue$4.run(EventQueue.java:721)
at java.desktop/java.awt.EventQueue$4.run(EventQueue.java:715)
at java.base/java.security.AccessController.doPrivileged(AccessController.java:391)
at java.base/java.security.ProtectionDomain$JavaSecurityAccessImpl.doIntersectionPrivilege(ProtectionDomain.java:85)
at java.desktop/java.awt.EventQueue.dispatchEvent(EventQueue.java:740)
at java.desktop/java.awt.EventDispatchThread.pumpOneEventForFilters(EventDispatchThread.java:203)
at java.desktop/java.awt.EventDispatchThread.pumpEventsForFilter(EventDispatchThread.java:124)
at java.desktop/java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispatchThread.java:113)
at java.desktop/java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:109)
at java.desktop/java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:101)
at java.desktop/java.awt.EventDispatchThread.run(EventDispatchThread.java:90)
Caused by: java.lang.ClassNotFoundException: org.apache.xerces.xni.XNIException
at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:636)
at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:182)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:519)
... 20 more
BUILD SUCCESSFUL (total time: 1 second)
It acts like the WebClient class can't be located at run-time. It says Build Successful, but the Jframe never appears on the screen, andNetBeans usually indicates that I have something running, but not with this. It's as if it compiles and then doesn't fly.