Running MapReduce job in Oozie

487 Views Asked by At

I'm trying to run a MapReduce job on Oozie but it is failing and gets killed. The error is also not displayed in the oozie console, its only giving this error message: "Map/Reduce failed, error message[]". Also where can i find the logs, will it contain the exact error. I'm new to this and have no idea what is wrong. Any one please tell me what is wrong with the below codes. I'm suffering with this for two days now.

This is my MapReduce program.

package com.hadoop.mapreduce;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class AnalysePatientRecords {

    public static class SampleMap extends Mapper<LongWritable, Text, Text, Text> {

        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();
            String[] lineElements = line.split("\\|",-1);
            Text state = new Text(lineElements[11]);
            Text patientId = new Text(lineElements[0]);
            context.write(state, patientId);

        }

    }

    public static class SampleReduce extends Reducer<Text, Text, Text, IntWritable> {

        @SuppressWarnings("unused")
        public void reduce(Text value, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {

            int count = 0;
            for (Text val : values) {
                count = count + 1;
            }
            context.write(value, new IntWritable(count));

        }

    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "patient analysis");
        job.setJarByClass(AnalysePatientRecords.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setMapperClass(SampleMap.class);
        job.setReducerClass(SampleReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

job.properties

nameNode=hdfs://localhost:54310
jobTracker=localhost:8032
queueName=default
examplesRoot=MapReduce

oozie.libpath=${nameNode}/user/${user.name}/share/lib
oozie.use.system.libpath=true

oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/map-reduce/workflow.xml
outputDir=map-reduce

workflow.xml

<workflow-app xmlns="uri:oozie:workflow:0.2" name="map-reduce-wf">
    <start to="mr-node"/>
    <action name="mr-node">
        <map-reduce>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <prepare>
                <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/${outputDir}"/>
            </prepare>
            <configuration>
        <property>
                    <name>mapred.mapper.new-api</name>
                    <value>true</value>
                </property>
                <property>
                    <name>mapred.reducer.new-api</name>
                    <value>true</value>
                </property>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
                <property>
                    <name>mapreduce.map.class</name>
                    <value>AnalysePatientRecords$SampleMap</value>
                </property>
                <property>
                    <name>mapreduce.reduce.class</name>
                    <value>AnalysePatientRecords$SampleReduce</value>
                </property>
        <property>
                <name>mapred.mapoutput.key.class</name>
                <value>org.apache.hadoop.io.Text</value>
                </property>
                <property>
                        <name>mapred.mapoutput.value.class</name>
                        <value>org.apache.hadoop.io.Text</value>
                </property>
                <property>
                        <name>mapred.output.key.class</name>
                        <value>org.apache.hadoop.io.Text</value>
                </property>
                <property>
                        <name>mapred.output.value.class</name>
                        <value>org.apache.hadoop.io.IntWritable</value>
                </property>
                <property>
                    <name>mapred.map.tasks</name>
                    <value>1</value>
                </property>
                <property>
                    <name>mapred.input.dir</name>
                    <value>/user/${wf:user()}/${examplesRoot}/input-data/text</value>
                </property>
                <property>
                    <name>mapred.output.dir</name>
                    <value>/user/${wf:user()}/${examplesRoot}/output-data/${outputDir}</value>
                </property>
            </configuration>
        </map-reduce>
        <ok to="end"/>
        <error to="fail"/>
    </action>
    <kill name="fail">
        <message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end"/>
</workflow-app>

I also created the jar of the above mentioned MapReduce program and placed it inside the lib folder. I don't know what is wrong please help me with this. Please...

1

There are 1 best solutions below

0
On

Check out Oozie Hive Action Logging. As mentioned in the documentation - "Hive action logs are redirected to the Oozie Launcher map-reduce job task STDOUT/STDERR that runs Hive". @YoungHobbit had also clarified about checking in oozie launcher job log in their comment.

You can access this log from the Oozie web-console. From Oozie web-console, from the Hive action pop up using the 'Console URL' link, it is possible to navigate to the Oozie Launcher map-reduce job task logs via the Hadoop job-tracker web-console.