I'm currently trying to find out when you start the MapReduce job, what happens by creating some system.out.println () file in certain places in the code, but you know that this print statement prints on my terminal when the job is running, Maybe someone help me figure out exactly what I'm doing wrong here.
import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.StatusReporter; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCountJob { public static int iterations; public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); @Override public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { System.out.println("blalblbfbbfbbbgghghghghghgh"); StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); String myWord = itr.nextToken(); int n = 0; while(n< 5){ myWord = myWord+ "Test my appending words"; n++; } System.out.println("Print my word: "+myWord); word.set(myWord); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); TaskAttemptID taskid = new TaskAttemptID(); TokenizerMapper my = new TokenizerMapper(); if (args.length != 3) { System.err.println("Usage: WordCountJob <in> <out> <iterations>"); System.exit(2); } iterations = new Integer(args[2]); Path inPath = new Path(args[0]); Path outPath = null; for (int i = 0; i<iterations; ++i){ System.out.println("Iteration number: "+i); outPath = new Path(args[1]+i); Job job = new Job(conf, "WordCountJob"); job.setJarByClass(WordCountJob.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); job.waitForCompletion(true); inPath = outPath; } } }
mapreduce hadoop
asembereng
source share