hadoop package-info 源码

  • 2022-10-20
  • 浏览 (177)

haddop package-info 代码

文件路径:/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/package-info.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/** Rumen is a data extraction and analysis tool built for 
 * <a href="http://hadoop.apache.org/">Apache Hadoop</a>. Rumen mines job history
 * logs to extract meaningful data and stores it into an easily-parsed format.
 * 
 * The default output format of Rumen is <a href="http://www.json.org">JSON</a>.
 * Rumen uses the <a href="http://jackson.codehaus.org/">Jackson</a> library to 
 * create JSON objects.
 * <br><br>
 * 
 * The following classes can be used to programmatically invoke Rumen:
 * <ol>
 *   <li>
 *    {@link org.apache.hadoop.tools.rumen.JobConfigurationParser}<br>
 *      A parser to parse and filter out interesting properties from job 
 *      configuration.
 *      
 *      <br><br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // An example to parse and filter out job name
 *        
 *        String conf_filename = .. // assume the job configuration filename here
 *        
 *        // construct a list of interesting properties
 *        List&lt;String&gt; interestedProperties = new ArrayList&lt;String&gt;();
 *        interestedProperties.add("mapreduce.job.name");
 *        
 *        JobConfigurationParser jcp = 
 *          new JobConfigurationParser(interestedProperties);
 *
 *        InputStream in = new FileInputStream(conf_filename);
 *        Properties parsedProperties = jcp.parse(in);
 *     </code>
 *     </pre>
 *     Some of the commonly used interesting properties are enumerated in 
 *     {@link org.apache.hadoop.tools.rumen.JobConfPropertyNames}. <br><br>
 *     
 *     <b>Note:</b>
 *        A single instance of {@link org.apache.hadoop.tools.rumen.JobConfigurationParser} 
 *        can be used to parse multiple job configuration files. 
 *     
 *   </li>
 *   <li>
 *    {@link org.apache.hadoop.tools.rumen.JobHistoryParser} <br>
 *      A parser that parses job history files. It is an interface and actual 
 *      implementations are defined as Enum in 
 *      {@link org.apache.hadoop.tools.rumen.JobHistoryParserFactory}. Note that
 *      {@link org.apache.hadoop.tools.rumen.RewindableInputStream}<br>
 *      is a wrapper class around {@link java.io.InputStream} to make the input 
 *      stream rewindable.
 *      
 *      <br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // An example to parse a current job history file i.e a job history 
 *        // file for which the version is known
 *        
 *        String filename = .. // assume the job history filename here
 *        
 *        InputStream in = new FileInputStream(filename);
 *        
 *        HistoryEvent event = null;
 *        
 *        JobHistoryParser parser = new CurrentJHParser(in);
 *        
 *        event = parser.nextEvent();
 *        // process all the events
 *        while (event != null) {
 *          // ... process all event
 *          event = parser.nextEvent();
 *        }
 *        
 *        // close the parser and the underlying stream
 *        parser.close();
 *      </code>
 *      </pre>
 *      
 *      {@link org.apache.hadoop.tools.rumen.JobHistoryParserFactory} provides a 
 *      {@link org.apache.hadoop.tools.rumen.JobHistoryParserFactory#getParser(org.apache.hadoop.tools.rumen.RewindableInputStream)}
 *      API to get a parser for parsing the job history file. Note that this
 *      API can be used if the job history version is unknown.<br><br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // An example to parse a job history for which the version is not 
 *        // known i.e using JobHistoryParserFactory.getParser()
 *        
 *        String filename = .. // assume the job history filename here
 *        
 *        InputStream in = new FileInputStream(filename);
 *        RewindableInputStream ris = new RewindableInputStream(in);
 *        
 *        // JobHistoryParserFactory will check and return a parser that can
 *        // parse the file
 *        JobHistoryParser parser = JobHistoryParserFactory.getParser(ris);
 *        
 *        // now use the parser to parse the events
 *        HistoryEvent event = parser.nextEvent();
 *        while (event != null) {
 *          // ... process the event
 *          event = parser.nextEvent();
 *        }
 *        
 *        parser.close();
 *      </code>
 *      </pre>
 *      <b>Note:</b>
 *        Create one instance to parse a job history log and close it after use.
 *  </li>
 *  <li>
 *    {@link org.apache.hadoop.tools.rumen.TopologyBuilder}<br>
 *      Builds the cluster topology based on the job history events. Every 
 *      job history file consists of events. Each event can be represented using
 *      {@link org.apache.hadoop.mapreduce.jobhistory.HistoryEvent}. 
 *      These events can be passed to {@link org.apache.hadoop.tools.rumen.TopologyBuilder} using 
 *      {@link org.apache.hadoop.tools.rumen.TopologyBuilder#process(org.apache.hadoop.mapreduce.jobhistory.HistoryEvent)}.
 *      A cluster topology can be represented using {@link org.apache.hadoop.tools.rumen.LoggedNetworkTopology}.
 *      Once all the job history events are processed, the cluster 
 *      topology can be obtained using {@link org.apache.hadoop.tools.rumen.TopologyBuilder#build()}.
 *      
 *      <br><br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // Building topology for a job history file represented using 
 *        // 'filename' and the corresponding configuration file represented 
 *        // using 'conf_filename'
 *        String filename = .. // assume the job history filename here
 *        String conf_filename = .. // assume the job configuration filename here
 *        
 *        InputStream jobConfInputStream = new FileInputStream(filename);
 *        InputStream jobHistoryInputStream = new FileInputStream(conf_filename);
 *        
 *        TopologyBuilder tb = new TopologyBuilder();
 *        
 *        // construct a list of interesting properties
 *        List&lt;String&gt; interestingProperties = new ArrayList%lt;String&gt;();
 *        // add the interesting properties here
 *        interestingProperties.add("mapreduce.job.name");
 *        
 *        JobConfigurationParser jcp = 
 *          new JobConfigurationParser(interestingProperties);
 *        
 *        // parse the configuration file
 *        tb.process(jcp.parse(jobConfInputStream));
 *        
 *        // read the job history file and pass it to the 
 *        // TopologyBuilder.
 *        JobHistoryParser parser = new CurrentJHParser(jobHistoryInputStream);
 *        HistoryEvent e;
 *        
 *        // read and process all the job history events
 *        while ((e = parser.nextEvent()) != null) {
 *          tb.process(e);
 *        }
 *        
 *        LoggedNetworkTopology topology = tb.build();
 *      </code>
 *      </pre>
 *  </li>
 *  <li>
 *    {@link org.apache.hadoop.tools.rumen.JobBuilder}<br>
 *      Summarizes a job history file.
 *      {@link org.apache.hadoop.tools.rumen.JobHistoryUtils} provides  
 *      {@link org.apache.hadoop.tools.rumen.JobHistoryUtils#extractJobID(String)} 
 *      API for extracting job id from job history or job configuration files
 *      which can be used for instantiating {@link org.apache.hadoop.tools.rumen.JobBuilder}. 
 *      {@link org.apache.hadoop.tools.rumen.JobBuilder} generates a 
 *      {@link org.apache.hadoop.tools.rumen.LoggedJob} object via 
 *      {@link org.apache.hadoop.tools.rumen.JobBuilder#build()}. 
 *      See {@link org.apache.hadoop.tools.rumen.LoggedJob} for more details.
 *      
 *      <br><br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // An example to summarize a current job history file 'filename'
 *        // and the corresponding configuration file 'conf_filename'
 *        
 *        String filename = .. // assume the job history filename here
 *        String conf_filename = .. // assume the job configuration filename here
 *        
 *        InputStream jobConfInputStream = new FileInputStream(job_filename);
 *        InputStream jobHistoryInputStream = new FileInputStream(conf_filename);
 *        
 *        String jobID = TraceBuilder.extractJobID(job_filename);
 *        JobBuilder jb = new JobBuilder(jobID);
 *        
 *        // construct a list of interesting properties
 *        List&lt;String&gt; interestingProperties = new ArrayList%lt;String&gt;();
 *        // add the interesting properties here
 *        interestingProperties.add("mapreduce.job.name");
 *        
 *        JobConfigurationParser jcp = 
 *          new JobConfigurationParser(interestingProperties);
 *        
 *        // parse the configuration file
 *        jb.process(jcp.parse(jobConfInputStream));
 *        
 *        // parse the job history file
 *        JobHistoryParser parser = new CurrentJHParser(jobHistoryInputStream);
 *        try {
 *          HistoryEvent e;
 *          // read and process all the job history events
 *          while ((e = parser.nextEvent()) != null) {
 *            jobBuilder.process(e);
 *          }
 *        } finally {
 *          parser.close();
 *        }
 *        
 *        LoggedJob job = jb.build();
 *      </code>
 *      </pre>
 *     <b>Note:</b>
 *       The order of parsing the job configuration file or job history file is 
 *       not important. Create one instance to parse the history file and job 
 *       configuration.
 *   </li>
 *   <li>
 *    {@link org.apache.hadoop.tools.rumen.DefaultOutputter}<br>
 *      Implements {@link org.apache.hadoop.tools.rumen.Outputter} and writes 
 *      JSON object in text format to the output file. 
 *      {@link org.apache.hadoop.tools.rumen.DefaultOutputter} can be 
 *      initialized with the output filename.
 *      
 *      <br><br>
 *      <i>Sample code</i>:  
 *      <pre>
 *      <code>
 *        // An example to summarize a current job history file represented by
 *        // 'filename' and the configuration filename represented using 
 *        // 'conf_filename'. Also output the job summary to 'out.json' along 
 *        // with the cluster topology to 'topology.json'.
 *        
 *        String filename = .. // assume the job history filename here
 *        String conf_filename = .. // assume the job configuration filename here
 *        
 *        Configuration conf = new Configuration();
 *        DefaultOutputter do = new DefaultOutputter();
 *        do.init("out.json", conf);
 *        
 *        InputStream jobConfInputStream = new FileInputStream(filename);
 *        InputStream jobHistoryInputStream = new FileInputStream(conf_filename);
 *        
 *        // extract the job-id from the filename
 *        String jobID = TraceBuilder.extractJobID(filename);
 *        JobBuilder jb = new JobBuilder(jobID);
 *        TopologyBuilder tb = new TopologyBuilder();
 *        
 *        // construct a list of interesting properties
 *        List&lt;String&gt; interestingProperties = new ArrayList%lt;String&gt;();
 *        // add the interesting properties here
 *        interestingProperties.add("mapreduce.job.name");
 *        
 *        JobConfigurationParser jcp =
 *          new JobConfigurationParser(interestingProperties);
 *          
 *        // parse the configuration file
 *        tb.process(jcp.parse(jobConfInputStream));
 *        
 *        // read the job history file and pass it to the
 *        // TopologyBuilder.
 *        JobHistoryParser parser = new CurrentJHParser(jobHistoryInputStream);
 *        HistoryEvent e;
 *        while ((e = parser.nextEvent()) != null) {
 *          jb.process(e);
 *          tb.process(e);
 *        }
 *        
 *        LoggedJob j = jb.build();
 *        
 *        // serialize the job summary in json (text) format
 *        do.output(j);
 *        
 *        // close
 *        do.close();
 *        
 *        do.init("topology.json", conf);
 *        
 *        // get the job summary using TopologyBuilder
 *        LoggedNetworkTopology topology = topologyBuilder.build();
 *        
 *        // serialize the cluster topology in json (text) format
 *        do.output(topology);
 *        
 *        // close
 *        do.close();
 *      </code>
 *      </pre>
 *   </li>
 *   <li>
 *    {@link org.apache.hadoop.tools.rumen.JobTraceReader}<br>
 *      A reader for reading {@link org.apache.hadoop.tools.rumen.LoggedJob} serialized using 
 *      {@link org.apache.hadoop.tools.rumen.DefaultOutputter}. {@link org.apache.hadoop.tools.rumen.LoggedJob} 
 *      provides various APIs for extracting job details. Following are the most
 *      commonly used ones
 *        <ul>
 *          <li>{@link org.apache.hadoop.tools.rumen.LoggedJob#getMapTasks()} : Get the map tasks</li>
 *          <li>{@link org.apache.hadoop.tools.rumen.LoggedJob#getReduceTasks()} : Get the reduce tasks</li>
 *          <li>{@link org.apache.hadoop.tools.rumen.LoggedJob#getOtherTasks()} : Get the setup/cleanup tasks</li>
 *          <li>{@link org.apache.hadoop.tools.rumen.LoggedJob#getOutcome()} : Get the job's outcome</li>
 *          <li>{@link org.apache.hadoop.tools.rumen.LoggedJob#getSubmitTime()} : Get the job's submit time</li>
 *          <li>{@link org.apache.hadoop.tools.rumen.LoggedJob#getFinishTime()} : Get the job's finish time</li>
 *        </ul>
 *        
 *      <br><br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // An example to read job summary from a trace file 'out.json'.
 *        JobTraceReader reader = new JobTracerReader("out.json");
 *        LoggedJob job = reader.getNext();
 *        while (job != null) {
 *          // .... process job level information
 *          for (LoggedTask task : job.getMapTasks()) {
 *            // process all the map tasks in the job
 *            for (LoggedTaskAttempt attempt : task.getAttempts()) {
 *              // process all the map task attempts in the job
 *            }
 *          }
 *          
 *          // get the next job
 *          job = reader.getNext();
 *        }
 *        reader.close();
 *      </code>
 *      </pre>         
 *   </li>
 *   <li>
 *    {@link org.apache.hadoop.tools.rumen.ClusterTopologyReader}<br>
 *      A reader to read {@link org.apache.hadoop.tools.rumen.LoggedNetworkTopology} serialized using 
 *      {@link org.apache.hadoop.tools.rumen.DefaultOutputter}. {@link org.apache.hadoop.tools.rumen.ClusterTopologyReader} can be 
 *      initialized using the serialized topology filename. 
 *      {@link org.apache.hadoop.tools.rumen.ClusterTopologyReader#get()} can
 *      be used to get the 
 *      {@link org.apache.hadoop.tools.rumen.LoggedNetworkTopology}. 
 *      
 *      <br><br>
 *      <i>Sample code</i>:
 *      <pre>
 *      <code>
 *        // An example to read the cluster topology from a topology output file
 *        // 'topology.json'
 *        ClusterTopologyReader reader = new ClusterTopologyReader("topology.json");
 *        LoggedNetworkTopology topology  = reader.get();
 *        for (LoggedNetworkTopology t : topology.getChildren()) {
 *          // process the cluster topology
 *        }
 *        reader.close();
 *      </code>
 *      </pre>
 *   </li>
 * </ol>     
 */

package org.apache.hadoop.tools.rumen;

相关信息

hadoop 源码目录

相关文章

hadoop AbstractClusterStory 源码

hadoop Anonymizer 源码

hadoop CDFPiecewiseLinearRandomGenerator 源码

hadoop CDFRandomGenerator 源码

hadoop ClusterStory 源码

hadoop ClusterTopologyReader 源码

hadoop CurrentJHParser 源码

hadoop DeepCompare 源码

hadoop DeepInequalityException 源码

hadoop DefaultInputDemuxer 源码

0  赞