I am new to Hadoop and followed a WordCount MapReduce example that I found online to run in Ubuntu. I managed to complete all but the final step - i.e. running the job. I uploaded the input file to the cluster and my input file directory is /user/inputdata/test.txt and I intend on using the /user/output directory for the file. With that being said I ran the following command
hadoop jar /home/wasim/Downloads/jar_files/MapReduceDemo.jar MapReduceDemo.WordCounter /user/inputdata/test.txt /user/output
however, when I receive the following error
usage: WordCount <input_file> <output_directory>
I know this error stems from my java to ensure that I have the correct arguments when running the hadoop jar. I've tried to the command in several other ways but keep on getting the same error. I can't figure out what I am doing wrong.
package MapReduceDemo;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCounter {
public static class TokenizeMapper extends Mapper<Object,Text,Text,IntWritable>{
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer st = new StringTokenizer(value.toString());
Text wordOut = new Text();
IntWritable one = new IntWritable(1);
while(st.hasMoreTokens()){
wordOut.set(st.nextToken());
context.write(wordOut, one);
}
}
}
public static class SumReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
public void reduce(Text term,Iterable<IntWritable> ones, Context context) throws IOException, InterruptedException {
int count = 0;
Iterator<IntWritable> iterator = ones.iterator();
while(iterator.hasNext()) {
count++;
iterator.next();
}
IntWritable output = new IntWritable(count);
context.write(term, output);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length !=2) {
System.err.println("usage: WordCount <input_file> <output_directory>");
System.exit(2);
}
Job job = Job.getInstance(conf,"Word Count");
job.setJarByClass(WordCounter.class);
job.setMapperClass(TokenizeMapper.class);
job.setReducerClass(SumReducer.class);
job.setNumReduceTasks(10);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
boolean status = job.waitForCompletion(true);
if(status) {
System.exit(0);
}
else {
System.exit(1);
}
}
}