hadoop DataStatistics 源码

  • 2022-10-20
  • 浏览 (197)

haddop DataStatistics 代码

文件路径:/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DataStatistics.java

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.mapreduce.v2.app.speculate;

public class DataStatistics {

  /**
   * factor used to calculate confidence interval within 95%.
   */
  private static final double DEFAULT_CI_FACTOR = 1.96;
  private int count = 0;
  private double sum = 0;
  private double sumSquares = 0;

  public DataStatistics() {
  }

  public DataStatistics(final double initNum) {
    this.count = 1;
    this.sum = initNum;
    this.sumSquares = initNum * initNum;
  }

  public synchronized void add(final double newNum) {
    this.count++;
    this.sum += newNum;
    this.sumSquares += newNum * newNum;
  }

  public synchronized void updateStatistics(final double old,
      final double update) {
    this.sum += update - old;
    this.sumSquares += (update * update) - (old * old);
  }

  public synchronized double mean() {
    return count == 0 ? 0.0 : sum / count;
  }

  public synchronized double var() {
    // E(X^2) - E(X)^2
    if (count <= 1) {
      return 0.0;
    }
    double mean = mean();
    return Math.max((sumSquares / count) - mean * mean, 0.0d);
  }

  public synchronized double std() {
    return Math.sqrt(this.var());
  }

  public synchronized double outlier(final float sigma) {
    if (count != 0.0) {
      return mean() + std() * sigma;
    }

    return 0.0;
  }

  public synchronized double count() {
    return count;
  }

  /**
   * calculates the mean value within 95% ConfidenceInterval.
   * 1.96 is standard for 95 %
   *
   * @return the mean value adding 95% confidence interval
   */
  public synchronized double meanCI() {
    if (count <= 1) {
      return 0.0;
    }
    double currMean = mean();
    double currStd = std();
    return currMean + (DEFAULT_CI_FACTOR * currStd / Math.sqrt(count));
  }

  public String toString() {
    return "DataStatistics: count is " + count + ", sum is " + sum
        + ", sumSquares is " + sumSquares + " mean is " + mean()
        + " std() is " + std() + ", meanCI() is " + meanCI();
  }
}

相关信息

hadoop 源码目录

相关文章

hadoop DefaultSpeculator 源码

hadoop ExponentiallySmoothedTaskRuntimeEstimator 源码

hadoop LegacyTaskRuntimeEstimator 源码

hadoop NullTaskRuntimesEngine 源码

hadoop SimpleExponentialTaskRuntimeEstimator 源码

hadoop Speculator 源码

hadoop SpeculatorEvent 源码

hadoop StartEndTimesBase 源码

hadoop TaskRuntimeEstimator 源码

hadoop TaskSpeculationPredicate 源码

0  赞