hadoop源码解析—— conf包中Configuration.java解析(一)

时间:2022-03-16 17:18:43

hadoop源码解析—— conf包中Configuration.java解析(一)

地方


package org.apache.hadoop.conf;

import java.io.BufferedInputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.WeakHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//引入了IO的流类
//引入了网络编程类,用来封装或获取网络资源
//引入了工具包中的集合类。其中StringTokenier在hadoop中用的非常多,特别是在mapreduce编程中,经常需要这个工具类来处理数据。
//它起着分词器的作用。
//关于CopyOnWriteArrayList,之前没接触过,具体推荐 http://blog.csdn.net/imzoer/article/details/9751591,就是实现了线程安全
//关于regex.Matcher和regex.Pattern,看regex可知就是正则类,具体用法不太一样 http://ningtukun.blog.163.com/blog/static/186541445201292984311656/
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.StringUtils;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

/**
* Provides access to configuration parameters.
*
* <h4 id="Resources">Resources</h4>
*
* <p>Configurations are specified by resources. A resource contains a set of
* name/value pairs as XML data. Each resource is named by either a
* <code>String</code> or by a {@link Path}. If named by a <code>String</code>,
* then the classpath is examined for a file with that name. If named by a
* <code>Path</code>, then the local filesystem is examined directly, without
* referring to the classpath.
*
* <p>Unless explicitly turned off, Hadoop by default specifies two
* resources, loaded in-order from the classpath: <ol>
* <li><tt><a href="{@docRoot}/../core-default.html">core-default.xml</a>
* </tt>: Read-only defaults for hadoop.</li>
* <li><tt>core-site.xml</tt>: Site-specific configuration for a given hadoop
* installation.</li>
* </ol>
* Applications may add additional resources, which are loaded
* subsequent to these resources in the order they are added.
*
* <h4 id="FinalParams">Final Parameters</h4>
*
* <p>Configuration parameters may be declared <i>final</i>.
* Once a resource declares a value final, no subsequently-loaded
* resource can alter that value.
* For example, one might define a final parameter with:
* <tt><pre>
* <property>
* <name>dfs.client.buffer.dir</name>
* <value>/tmp/hadoop/dfs/client</value>
* <b><final>true</final></b>
* </property></pre></tt>
*
* Administrators typically define parameters as final in
* <tt>core-site.xml</tt> for values that user applications may not alter.
*
* <h4 id="VariableExpansion">Variable Expansion</h4>
*
* <p>Value strings are first processed for <i>variable expansion</i>. The
* available properties are:<ol>
* <li>Other properties defined in this Configuration; and, if a name is
* undefined here,</li>
* <li>Properties in {@link System#getProperties()}.</li>
* </ol>
*
* <p>For example, if a configuration resource contains the following property
* definitions:
* <tt><pre>
* <property>
* <name>basedir</name>
* <value>/user/${<i>user.name</i>}</value>
* </property>
*
* <property>
* <name>tempdir</name>
* <value>${<i>basedir</i>}/tmp</value>
* </property></pre></tt>
*
* When <tt>conf.get("tempdir")</tt> is called, then <tt>${<i>basedir</i>}</tt>
* will be resolved to another property in this Configuration, while
* <tt>${<i>user.name</i>}</tt> would then ordinarily be resolved to the value
* of the System property with that name.
*/
//一大段注释。第一句说这个类是用来提供访问配置中属性的渠道
//第二段话说配置文件的构造,XML文件中键值对。
//第三段话说除非主动关闭,否则hadoop会默认加载一些默认的配置文件。并且可以“重载”
//第四段说了配置文件中属性标记为final会怎么样
//第五段说配置文件中的属性可以用变量来表示,可以不是具体的实值。还举了个例子。
public class Configuration implements Iterable<Map.Entry<String,String>>,
Writable {
//实现了Iterable和Writable接口。
//实现Iterable接口,可以调用Iterator()方法进行迭代
//关于Map.Entry类,之前不了解。就是map的一种方便的遍历工具类
//实现了Writable接口。hadoop没有采用Java的序列化(具体原因不解释),而是引入了自己的序列化系统,所有的
//序列化对象都要实现writable接口。以后会遇到。
private static final Log LOG =
LogFactory.getLog(Configuration.class);
//创建了一个日志类。并做了初始化
private boolean quietmode = true;
//布尔变量quietmode,“安静模式”,用来确定加载配置的时候日志的某些动作,
//当为true的时候则在加载解析配置文件的过程中不输出日志信息,反之......
/**
* List of configuration resources.
*/
private ArrayList<Object> resources = new ArrayList<Object>();
//保存了所有通过addResource()方法添加的Configuration对象的资源
/**
* List of configuration parameters marked <b>final</b>.
*/
private Set<String> finalParameters = new HashSet<String>();
//用来保存所有在配置文件中已经被声明为final的键–值对的键
private boolean loadDefaults = true;
//是否加载默认的配置资源
/**
* Configuration objects
*/
private static final WeakHashMap<Configuration,Object> REGISTRY =
new WeakHashMap<Configuration,Object>();
//REGISTRY是一个WeakHashMap的变量,key为Configuration,value为Object,
//可以看出这个对象存储了不同对象的多个配置信息,弱HashMap可以自动清除不在正常使用的键对应的条目,
//发现如果这个值是null会重新加载默认的配置文件中的信息
/**
* List of default Resources. Resources are loaded in the order of the list
* entries
*/
private static final CopyOnWriteArrayList<String> defaultResources =
new CopyOnWriteArrayList<String>();
//存放的是默认的配置信息,通过方法addDefaultResource()可以添加系统的默认资源
//存储配置文件的名字而不是配置文件的全路径
/**
* Flag to indicate if the storage of resource which updates a key needs
* to be stored for each key
*/
private boolean storeResource;
//是否需要更新配置文件的标识
/**
* Stores the mapping of key to the resource which modifies or loads
* the key most recently
*/
private HashMap<String, String> updatingResource;
//保存所有需要更新的配置文件
static{
//print deprecation warning if hadoop-site.xml is found in classpath
ClassLoader cL = Thread.currentThread().getContextClassLoader();
if (cL == null) {
cL = Configuration.class.getClassLoader();
}
if(cL.getResource("hadoop-site.xml")!=null) {
LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. " +
"Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, "
+ "mapred-site.xml and hdfs-site.xml to override properties of " +
"core-default.xml, mapred-default.xml and hdfs-default.xml " +
"respectively");
}
addDefaultResource("core-default.xml");
addDefaultResource("core-site.xml");
}
//静态代码块。加载了两个核心配置文件,一个是默认的,一个是用户自己配置的。
//如果加载了hadoop-site.xml,则发个警告信息,说它不推荐用,以及推荐用哪个
private Properties properties;
//Hadoop配置文件解析后的键–值对,都存放在properties中
private Properties overlay;
//变量overlay用于记录通过set()方式改变的配置项。也就是说,出现在overlay中的键–值对是应用设置的,
//而不是通过对配置资源解析得到的
private ClassLoader classLoader;
{
classLoader = Thread.currentThread().getContextClassLoader();
if (classLoader == null) {
classLoader = Configuration.class.getClassLoader();
}
}
//定义了一个类加载器,并做了初始化。非静态代码块
/** A new configuration. */
public Configuration() {
this(true);
}
//相当于空构造方法,或者说调用空构造方法的时候默认调用默认配置文件
/** A new configuration where the behavior of reading from the default
* resources can be turned off.
*
* If the parameter {@code loadDefaults} is false, the new instance
* will not load resources from the default files.
* @param loadDefaults specifies whether to load from the default files
*/
public Configuration(boolean loadDefaults) {
this.loadDefaults = loadDefaults;
if (LOG.isDebugEnabled()) {
LOG.debug(StringUtils.stringifyException(new IOException("config()")));
}
synchronized(Configuration.class) {
REGISTRY.put(this, null);
}
this.storeResource = false;
}
//构造方法。参数是是否加载默认配置文件
/**
* A new configuration with the same settings and additional facility for
* storage of resource to each key which loads or updates
* the key most recently
* @param other the configuration from which to clone settings
* @param storeResource flag to indicate if the storage of resource to
* each key is to be stored
*/
private Configuration(Configuration other, boolean storeResource) {
this(other);
this.loadDefaults = other.loadDefaults;
this.storeResource = storeResource;
if (storeResource) {
updatingResource = new HashMap<String, String>();
}
}
//构造方法。加载了一个新的配置文件和现有的配置文件具有相同的一些配置并有一些新的配置
/**
* A new configuration with the same settings cloned from another.
*
* @param other the configuration from which to clone settings.
*/
@SuppressWarnings("unchecked")
public Configuration(Configuration other) {
if (LOG.isDebugEnabled()) {
LOG.debug(StringUtils.stringifyException
(new IOException("config(config)")));
}

this.resources = (ArrayList)other.resources.clone();
synchronized(other) {
if (other.properties != null) {
this.properties = (Properties)other.properties.clone();
}

if (other.overlay!=null) {
this.overlay = (Properties)other.overlay.clone();
}
}

this.finalParameters = new HashSet<String>(other.finalParameters);
synchronized(Configuration.class) {
REGISTRY.put(this, null);
}
}
//构造方法。加载了一个全新的配置文件,并克隆了其属性
/**
* Add a default resource. Resources are loaded in the order of the resources
* added.
* @param name file name. File should be present in the classpath.
*/
public static synchronized void addDefaultResource(String name) {
if(!defaultResources.contains(name)) {
defaultResources.add(name);
for(Configuration conf : REGISTRY.keySet()) {
if(conf.loadDefaults) {
conf.reloadConfiguration();
}
}
}
}
//加载默认配置文件方法
/**
* Add a configuration resource.
*
* The properties of this resource will override properties of previously
* added resources, unless they were marked <a href="#Final">final</a>.
*
* @param name resource to be added, the classpath is examined for a file
* with that name.
*/
public void addResource(String name) {
addResourceObject(name);
}

/**
* Add a configuration resource.
*
* The properties of this resource will override properties of previously
* added resources, unless they were marked <a href="#Final">final</a>.
*
* @param url url of the resource to be added, the local filesystem is
* examined directly to find the resource, without referring to
* the classpath.
*/
public void addResource(URL url) {
addResourceObject(url);
}

/**
* Add a configuration resource.
*
* The properties of this resource will override properties of previously
* added resources, unless they were marked <a href="#Final">final</a>.
*
* @param file file-path of resource to be added, the local filesystem is
* examined directly to find the resource, without referring to
* the classpath.
*/
public void addResource(Path file) {
addResourceObject(file);
}

/**
* Add a configuration resource.
*
* The properties of this resource will override properties of previously
* added resources, unless they were marked <a href="#Final">final</a>.
*
* @param in InputStream to deserialize the object from.
*/
public void addResource(InputStream in) {
addResourceObject(in);
}
//Hadoop在创建配置类的时候,考虑了三种资源:
//
//URL资源(网络资源,指的是一个链接);
//
//CLASSPATH资源(String形式);
//
//Hadoop文件系统中的Path资源(该资源是基于Hadoop的FileSystem的,使用斜线“/”作为分隔符,如果是绝对路径,应该以“/”开始)
/**
* Reload configuration from previously added resources.
*
* This method will clear all the configuration read from the added
* resources, and final parameters. This will make the resources to
* be read again before accessing the values. Values that are added
* via set methods will overlay values read from the resources.
*/
public synchronized void reloadConfiguration() {
properties = null; // trigger reload
finalParameters.clear(); // clear site-limits
}
//重新加载先前加载过的配置资源。加载前会先清空。加了线程并发关键字哦
private synchronized void addResourceObject(Object resource) {
resources.add(resource); // add to resources
reloadConfiguration();
}
//注意resource是Object类型。会触发配置的重新加载
private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
private static int MAX_SUBST = 20;

private String substituteVars(String expr) {
if (expr == null) {
return null;
}
Matcher match = varPat.matcher("");
String eval = expr;
for(int s=0; s<MAX_SUBST; s++) {
match.reset(eval);
if (!match.find()) {
return eval;
}
String var = match.group();
var = var.substring(2, var.length()-1); // remove ${ .. }
String val = null;
try {
val = System.getProperty(var);
} catch(SecurityException se) {
LOG.warn("Unexpected SecurityException in Configuration", se);
}
if (val == null) {
val = getRaw(var);
}
if (val == null) {
return eval; // return literal ${var}: var is unbound
}
// substitute
eval = eval.substring(0, match.start())+val+eval.substring(match.end());
}
throw new IllegalStateException("Variable substitution depth too large: "
+ MAX_SUBST + " " + expr);
}
//属性扩展属性和方法。
//varPat对含有环境变量的值的进行转换的正则表达式对象;
//MAX_SUBST是循环次数,为了避免死循环;
//当循环次数过多的时候抛异常。
/**
* Get the value of the <code>name</code> property, <code>null</code> if
* no such property exists.
*
* Values are processed for <a href="#VariableExpansion">variable expansion</a>
* before being returned.
*
* @param name the property name.
* @return the value of the <code>name</code> property,
* or null if no such property exists.
*/
public String get(String name) {
return substituteVars(getProps().getProperty(name));
}

/**
* Get the value of the <code>name</code> property, without doing
* <a href="#VariableExpansion">variable expansion</a>.
*
* @param name the property name.
* @return the value of the <code>name</code> property,
* or null if no such property exists.
*/
public String getRaw(String name) {
return getProps().getProperty(name);
}
//两种根据名称取得属性值的方法。没有这个属性就返回NULL。
//区别是第一个会进行属性扩展,第二个不会
/**
* Set the <code>value</code> of the <code>name</code> property.
*
* @param name property name.
* @param value property value.
*/
public void set(String name, String value) {
getOverlay().setProperty(name, value);
getProps().setProperty(name, value);
}

/**
* Sets a property if it is currently unset.
* @param name the property name
* @param value the new value
*/
public void setIfUnset(String name, String value) {
if (get(name) == null) {
set(name, value);
}
}
//两种根据名称设置值得方法。区别不解释
private synchronized Properties getOverlay() {
if (overlay==null){
overlay=new Properties();
}
return overlay;
}

/**
* Get the value of the <code>name</code> property. If no such property
* exists, then <code>defaultValue</code> is returned.
*
* @param name property name.
* @param defaultValue default value.
* @return property value, or <code>defaultValue</code> if the property
* doesn't exist.
*/
public String get(String name, String defaultValue) {
return substituteVars(getProps().getProperty(name, defaultValue));
}

/**
* Get the value of the <code>name</code> property as an <code>int</code>.
*
* If no such property exists, or if the specified value is not a valid
* <code>int</code>, then <code>defaultValue</code> is returned.
*
* @param name property name.
* @param defaultValue default value.
* @return property value as an <code>int</code>,
* or <code>defaultValue</code>.
*/
public int getInt(String name, int defaultValue) {
String valueString = get(name);
if (valueString == null)
return defaultValue;
try {
String hexString = getHexDigits(valueString);
if (hexString != null) {
return Integer.parseInt(hexString, 16);
}
return Integer.parseInt(valueString);
} catch (NumberFormatException e) {
return defaultValue;
}
}

/**
* Set the value of the <code>name</code> property to an <code>int</code>.
*
* @param name property name.
* @param value <code>int</code> value of the property.
*/
public void setInt(String name, int value) {
set(name, Integer.toString(value));
}


/**
* Get the value of the <code>name</code> property as a <code>long</code>.
* If no such property is specified, or if the specified value is not a valid
* <code>long</code>, then <code>defaultValue</code> is returned.
*
* @param name property name.
* @param defaultValue default value.
* @return property value as a <code>long</code>,
* or <code>defaultValue</code>.
*/
public long getLong(String name, long defaultValue) {
String valueString = get(name);
if (valueString == null)
return defaultValue;
try {
String hexString = getHexDigits(valueString);
if (hexString != null) {
return Long.parseLong(hexString, 16);
}
return Long.parseLong(valueString);
} catch (NumberFormatException e) {
return defaultValue;
}
}

private String getHexDigits(String value) {
boolean negative = false;
String str = value;
String hexString = null;
if (value.startsWith("-")) {
negative = true;
str = value.substring(1);
}
if (str.startsWith("0x") || str.startsWith("0X")) {
hexString = str.substring(2);
if (negative) {
hexString = "-" + hexString;
}
return hexString;
}
return null;
}

/**
* Set the value of the <code>name</code> property to a <code>long</code>.
*
* @param name property name.
* @param value <code>long</code> value of the property.
*/
public void setLong(String name, long value) {
set(name, Long.toString(value));
}

/**
* Get the value of the <code>name</code> property as a <code>float</code>.
* If no such property is specified, or if the specified value is not a valid
* <code>float</code>, then <code>defaultValue</code> is returned.
*
* @param name property name.
* @param defaultValue default value.
* @return property value as a <code>float</code>,
* or <code>defaultValue</code>.
*/
public float getFloat(String name, float defaultValue) {
String valueString = get(name);
if (valueString == null)
return defaultValue;
try {
return Float.parseFloat(valueString);
} catch (NumberFormatException e) {
return defaultValue;
}
}
/**
* Set the value of the <code>name</code> property to a <code>float</code>.
*
* @param name property name.
* @param value property value.
*/
public void setFloat(String name, float value) {
set(name,Float.toString(value));
}

/**
* Get the value of the <code>name</code> property as a <code>boolean</code>.
* If no such property is specified, or if the specified value is not a valid
* <code>boolean</code>, then <code>defaultValue</code> is returned.
*
* @param name property name.
* @param defaultValue default value.
* @return property value as a <code>boolean</code>,
* or <code>defaultValue</code>.
*/
public boolean getBoolean(String name, boolean defaultValue) {
String valueString = get(name);
if ("true".equals(valueString))
return true;
else if ("false".equals(valueString))
return false;
else return defaultValue;
}

/**
* Set the value of the <code>name</code> property to a <code>boolean</code>.
*
* @param name property name.
* @param value <code>boolean</code> value of the property.
*/
public void setBoolean(String name, boolean value) {
set(name, Boolean.toString(value));
}

/**
* Set the given property, if it is currently unset.
* @param name property name
* @param value new value
*/
public void setBooleanIfUnset(String name, boolean value) {
setIfUnset(name, Boolean.toString(value));
}

/**
* Set the value of the <code>name</code> property to the given type. This
* is equivalent to <code>set(<name>, value.toString())</code>.
* @param name property name
* @param value new value
*/
public <T extends Enum<T>> void setEnum(String name, T value) {
set(name, value.toString());
}

/**
* Return value matching this enumerated type.
* @param name Property name
* @param defaultValue Value returned if no mapping exists
* @throws IllegalArgumentException If mapping is illegal for the type
* provided
*/
public <T extends Enum<T>> T getEnum(String name, T defaultValue) {
final String val = get(name);
return null == val
? defaultValue
: Enum.valueOf(defaultValue.getDeclaringClass(), val);
}
//hadoop常用类型的get/set方法,其中有的做了转换。
/**
* A class that represents a set of positive integer ranges. It parses
* strings of the form: "2-3,5,7-" where ranges are separated by comma and
* the lower/upper bounds are separated by dash. Either the lower or upper
* bound may be omitted meaning all values up to or over. So the string
* above means 2, 3, 5, and 7, 8, 9, ...
*/
public static class IntegerRanges {
private static class Range {
int start;
int end;
}

List<Range> ranges = new ArrayList<Range>();

public IntegerRanges() {
}

public IntegerRanges(String newValue) {
StringTokenizer itr = new StringTokenizer(newValue, ",");
while (itr.hasMoreTokens()) {
String rng = itr.nextToken().trim();
String[] parts = rng.split("-", 3);
if (parts.length < 1 || parts.length > 2) {
throw new IllegalArgumentException("integer range badly formed: " +
rng);
}
Range r = new Range();
r.start = convertToInt(parts[0], 0);
if (parts.length == 2) {
r.end = convertToInt(parts[1], Integer.MAX_VALUE);
} else {
r.end = r.start;
}
if (r.start > r.end) {
throw new IllegalArgumentException("IntegerRange from " + r.start +
" to " + r.end + " is invalid");
}
ranges.add(r);
}
}
//嵌套类。所以org.apache.hadoop.conf包中有四个类。
//解释一种字符串表示的整数范围。注释中有例子。
/**
* Convert a string to an int treating empty strings as the default value.
* @param value the string value
* @param defaultValue the value for if the string is empty
* @return the desired integer
*/
private static int convertToInt(String value, int defaultValue) {
String trim = value.trim();
if (trim.length() == 0) {
return defaultValue;
}
return Integer.parseInt(trim);
}
//字符串转整数方法。如果是空的就是用默认值
/**
* Is the given value in the set of ranges
* @param value the value to check
* @return is the value in the ranges?
*/
public boolean isIncluded(int value) {
for(Range r: ranges) {
if (r.start <= value && value <= r.end) {
return true;
}
}
return false;
}
//不解释
@Override
public String toString() {
StringBuffer result = new StringBuffer();
boolean first = true;
for(Range r: ranges) {
if (first) {
first = false;
} else {
result.append(',');
}
result.append(r.start);
result.append('-');
result.append(r.end);
}
return result.toString();
}
}
//重写了toString方法
/**
* Parse the given attribute as a set of integer ranges
* @param name the attribute name
* @param defaultValue the default value if it is not set
* @return a new set of ranges from the configured value
*/
public IntegerRanges getRange(String name, String defaultValue) {
return new IntegerRanges(get(name, defaultValue));
}

/**
* Get the comma delimited values of the <code>name</code> property as
* a collection of <code>String</code>s.
* If no such property is specified then empty collection is returned.
* <p>
* This is an optimized version of {@link #getStrings(String)}
*
* @param name property name.
* @return property value as a collection of <code>String</code>s.
*/
public Collection<String> getStringCollection(String name) {
String valueString = get(name);
return StringUtils.getStringCollection(valueString);
}

/**
* Get the comma delimited values of the <code>name</code> property as
* an array of <code>String</code>s.
* If no such property is specified then <code>null</code> is returned.
*
* @param name property name.
* @return property value as an array of <code>String</code>s,
* or <code>null</code>.
*/
public String[] getStrings(String name) {
String valueString = get(name);
return StringUtils.getStrings(valueString);
}

/**
* Get the comma delimited values of the <code>name</code> property as
* an array of <code>String</code>s.
* If no such property is specified then default value is returned.
*
* @param name property name.
* @param defaultValue The default value
* @return property value as an array of <code>String</code>s,
* or default value.
*/
public String[] getStrings(String name, String... defaultValue) {
String valueString = get(name);
if (valueString == null) {
return defaultValue;
} else {
return StringUtils.getStrings(valueString);
}
}

/**
* Set the array of string values for the <code>name</code> property as
* as comma delimited values.
*
* @param name property name.
* @param values The values
*/
public void setStrings(String name, String... values) {
set(name, StringUtils.arrayToString(values));
}

/**
* Load a class by name.
*
* @param name the class name.
* @return the class object.
* @throws ClassNotFoundException if the class is not found.
*/
public Class<?> getClassByName(String name) throws ClassNotFoundException {
return Class.forName(name, true, classLoader);
}

/**
* Get the value of the <code>name</code> property
* as an array of <code>Class</code>.
* The value of the property specifies a list of comma separated class names.
* If no such property is specified, then <code>defaultValue</code> is
* returned.
*
* @param name the property name.
* @param defaultValue default value.
* @return property value as a <code>Class[]</code>,
* or <code>defaultValue</code>.
*/
public Class<?>[] getClasses(String name, Class<?> ... defaultValue) {
String[] classnames = getStrings(name);
if (classnames == null)
return defaultValue;
try {
Class<?>[] classes = new Class<?>[classnames.length];
for(int i = 0; i < classnames.length; i++) {
classes[i] = getClassByName(classnames[i]);
}
return classes;
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}

/**
* Get the value of the <code>name</code> property as a <code>Class</code>.
* If no such property is specified, then <code>defaultValue</code> is
* returned.
*
* @param name the class name.
* @param defaultValue default value.
* @return property value as a <code>Class</code>,
* or <code>defaultValue</code>.
*/
public Class<?> getClass(String name, Class<?> defaultValue) {
String valueString = get(name);
if (valueString == null)
return defaultValue;
try {
return getClassByName(valueString);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}

/**
* Get the value of the <code>name</code> property as a <code>Class</code>
* implementing the interface specified by <code>xface</code>.
*
* If no such property is specified, then <code>defaultValue</code> is
* returned.
*
* An exception is thrown if the returned class does not implement the named
* interface.
*
* @param name the class name.
* @param defaultValue default value.
* @param xface the interface implemented by the named class.
* @return property value as a <code>Class</code>,
* or <code>defaultValue</code>.
*/
public <U> Class<? extends U> getClass(String name,
Class<? extends U> defaultValue,
Class<U> xface) {
try {
Class<?> theClass = getClass(name, defaultValue);
if (theClass != null && !xface.isAssignableFrom(theClass))
throw new RuntimeException(theClass+" not "+xface.getName());
else if (theClass != null)
return theClass.asSubclass(xface);
else
return null;
} catch (Exception e) {
throw new RuntimeException(e);
}
}

/**
* Set the value of the <code>name</code> property to the name of a
* <code>theClass</code> implementing the given interface <code>xface</code>.
*
* An exception is thrown if <code>theClass</code> does not implement the
* interface <code>xface</code>.
*
* @param name property name.
* @param theClass property value.
* @param xface the interface implemented by the named class.
*/
public void setClass(String name, Class<?> theClass, Class<?> xface) {
if (!xface.isAssignableFrom(theClass))
throw new RuntimeException(theClass+" not "+xface.getName());
set(name, theClass.getName());
}

/**
* Get a local file under a directory named by <i>dirsProp</i> with
* the given <i>path</i>. If <i>dirsProp</i> contains multiple directories,
* then one is chosen based on <i>path</i>'s hash code. If the selected
* directory does not exist, an attempt is made to create it.
*
* @param dirsProp directory in which to locate the file.
* @param path file-path.
* @return local file under the directory with the given path.
*/
public Path getLocalPath(String dirsProp, String path)
throws IOException {
String[] dirs = getStrings(dirsProp);
int hashCode = path.hashCode();
FileSystem fs = FileSystem.getLocal(this);
for (int i = 0; i < dirs.length; i++) { // try each local dir
int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length;
Path file = new Path(dirs[index], path);
Path dir = file.getParent();
if (fs.mkdirs(dir) || fs.exists(dir)) {
return file;
}
}
LOG.warn("Could not make " + path +
" in local directories from " + dirsProp);
for(int i=0; i < dirs.length; i++) {
int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length;
LOG.warn(dirsProp + "[" + index + "]=" + dirs[index]);
}
throw new IOException("No valid local directories in property: "+dirsProp);
}
//从指定路径中获取需要的本地路径。刚开始有点迷惑,注释读的迷迷糊糊,但是网上对这个方法介绍不多
//后来查到它的作用时才理解了。它大部分用在taskTracker上,当任务分发时用到的,因为配置文件要拷贝
//到本地,临时输出也在本地,不在HDFS上,所以就跟Local文件系统有了交集。下面的这个方法也一样。
/**
* Get a local file name under a directory named in <i>dirsProp</i> with
* the given <i>path</i>. If <i>dirsProp</i> contains multiple directories,
* then one is chosen based on <i>path</i>'s hash code. If the selected
* directory does not exist, an attempt is made to create it.
*
* @param dirsProp directory in which to locate the file.
* @param path file-path.
* @return local file under the directory with the given path.
*/
public File getFile(String dirsProp, String path)
throws IOException {
String[] dirs = getStrings(dirsProp);
int hashCode = path.hashCode();
for (int i = 0; i < dirs.length; i++) { // try each local dir
int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length;
File file = new File(dirs[index], path);
File dir = file.getParentFile();
if (dir.exists() || dir.mkdirs()) {
return file;
}
}
throw new IOException("No valid local directories in property: "+dirsProp);
}
//跟上一个方法差不多,只不过路径变成了文件。
/**
* Get the {@link URL} for the named resource.
*
* @param name resource name.
* @return the url for the named resource.
*/
public URL getResource(String name) {
return classLoader.getResource(name);
}
//根据名称获取资源URL链接
/**
* Get an input stream attached to the configuration resource with the
* given <code>name</code>.
*
* @param name configuration resource name.
* @return an input stream attached to the resource.
*/
public InputStream getConfResourceAsInputStream(String name) {
try {
URL url= getResource(name);

if (url == null) {
LOG.info(name + " not found");
return null;
} else {
LOG.info("found resource " + name + " at " + url);
}

return url.openStream();
} catch (Exception e) {
return null;
}
}
//从上个方法获取的URL链接开启一个输入流。从这可以看出这两个方法都是用在配置文件上的
//看起来是废话,但是读第一个方法的时候不太确定。
/**
* Get a {@link Reader} attached to the configuration resource with the
* given <code>name</code>.
*
* @param name configuration resource name.
* @return a reader attached to the resource.
*/
public Reader getConfResourceAsReader(String name) {
try {
URL url= getResource(name);

if (url == null) {
LOG.info(name + " not found");
return null;
} else {
LOG.info("found resource " + name + " at " + url);
}

return new InputStreamReader(url.openStream());
} catch (Exception e) {
return null;
}
}
//同上,做了封装
private synchronized Properties getProps() {
if (properties == null) {
properties = new Properties();
loadResources(properties, resources, quietmode);
if (overlay!= null) {
properties.putAll(overlay);
if (storeResource) {
for (Map.Entry<Object,Object> item: overlay.entrySet()) {
updatingResource.put((String) item.getKey(), "Unknown");
}
}
}
}
return properties;
}
//加载所有键值对,如果为空,则重新加载,包括已经更改过的。
//最后一个if包含的代码不是很懂,求大牛解释
/**
* Return the number of keys in the configuration.
*
* @return number of keys in the configuration.
*/
public int size() {
return getProps().size();
}
//获取所有键值对的数量
/**
* Clears all keys from the configuration.
*/
public void clear() {
getProps().clear();
getOverlay().clear();
}
//清空
/**
* Get an {@link Iterator} to go through the list of <code>String</code>
* key-value pairs in the configuration.
*
* @return an iterator over the entries.
*/
public Iterator<Map.Entry<String, String>> iterator() {
// Get a copy of just the string to string pairs. After the old object
// methods that allow non-strings to be put into configurations are removed,
// we could replace properties with a Map<String,String> and get rid of this
// code.
Map<String,String> result = new HashMap<String,String>();
for(Map.Entry<Object,Object> item: getProps().entrySet()) {
if (item.getKey() instanceof String &&
item.getValue() instanceof String) {
result.put((String) item.getKey(), (String) item.getValue());
}
}
return result.entrySet().iterator();
}
//获取所有键值对的迭代接口
private void loadResources(Properties properties,
ArrayList resources,
boolean quiet) {
if(loadDefaults) {
for (String resource : defaultResources) {
loadResource(properties, resource, quiet);
}

//support the hadoop-site.xml as a deprecated case
if(getResource("hadoop-site.xml")!=null) {
loadResource(properties, "hadoop-site.xml", quiet);
}
}

for (Object resource : resources) {
loadResource(properties, resource, quiet);
}
}

private void loadResource(Properties properties, Object name, boolean quiet) {
try {
DocumentBuilderFactory docBuilderFactory
= DocumentBuilderFactory.newInstance();
//ignore all comments inside the xml file
docBuilderFactory.setIgnoringComments(true);

//allow includes in the xml file
docBuilderFactory.setNamespaceAware(true);
try {
docBuilderFactory.setXIncludeAware(true);
} catch (UnsupportedOperationException e) {
LOG.error("Failed to set setXIncludeAware(true) for parser "
+ docBuilderFactory
+ ":" + e,
e);
}
DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
Document doc = null;
Element root = null;

if (name instanceof URL) { // an URL resource
URL url = (URL)name;
if (url != null) {
if (!quiet) {
LOG.info("parsing " + url);
}
doc = builder.parse(url.toString());
}
} else if (name instanceof String) { // a CLASSPATH resource
URL url = getResource((String)name);
if (url != null) {
if (!quiet) {
LOG.info("parsing " + url);
}
doc = builder.parse(url.toString());
}
} else if (name instanceof Path) { // a file resource
// Can't use FileSystem API or we get an infinite loop
// since FileSystem uses Configuration API. Use java.io.File instead.
File file = new File(((Path)name).toUri().getPath())
.getAbsoluteFile();
if (file.exists()) {
if (!quiet) {
LOG.info("parsing " + file);
}
InputStream in = new BufferedInputStream(new FileInputStream(file));
try {
doc = builder.parse(in);
} finally {
in.close();
}
}
} else if (name instanceof InputStream) {
try {
doc = builder.parse((InputStream)name);
} finally {
((InputStream)name).close();
}
} else if (name instanceof Element) {
root = (Element)name;
}

if (doc == null && root == null) {
if (quiet)
return;
throw new RuntimeException(name + " not found");
}

if (root == null) {
root = doc.getDocumentElement();
}
if (!"configuration".equals(root.getTagName()))
LOG.fatal("bad conf file: top-level element not <configuration>");
NodeList props = root.getChildNodes();
for (int i = 0; i < props.getLength(); i++) {
Node propNode = props.item(i);
if (!(propNode instanceof Element))
continue;
Element prop = (Element)propNode;
if ("configuration".equals(prop.getTagName())) {
loadResource(properties, prop, quiet);
continue;
}
if (!"property".equals(prop.getTagName()))
LOG.warn("bad conf file: element not <property>");
NodeList fields = prop.getChildNodes();
String attr = null;
String value = null;
boolean finalParameter = false;
for (int j = 0; j < fields.getLength(); j++) {
Node fieldNode = fields.item(j);
if (!(fieldNode instanceof Element))
continue;
Element field = (Element)fieldNode;
if ("name".equals(field.getTagName()) && field.hasChildNodes())
attr = ((Text)field.getFirstChild()).getData().trim();
if ("value".equals(field.getTagName()) && field.hasChildNodes())
value = ((Text)field.getFirstChild()).getData();
if ("final".equals(field.getTagName()) && field.hasChildNodes())
finalParameter = "true".equals(((Text)field.getFirstChild()).getData());
}

// Ignore this parameter if it has already been marked as 'final'
if (attr != null) {
if (value != null) {
if (!finalParameters.contains(attr)) {
properties.setProperty(attr, value);
if (storeResource) {
updatingResource.put(attr, name.toString());
}
} else if (!value.equals(properties.getProperty(attr))) {
LOG.warn(name+":a attempt to override final parameter: "+attr
+"; Ignoring.");
}
}
if (finalParameter) {
finalParameters.add(attr);
}
}
}

} catch (IOException e) {
LOG.fatal("error parsing conf file: " + e);
throw new RuntimeException(e);
} catch (DOMException e) {
LOG.fatal("error parsing conf file: " + e);
throw new RuntimeException(e);
} catch (SAXException e) {
LOG.fatal("error parsing conf file: " + e);
throw new RuntimeException(e);
} catch (ParserConfigurationException e) {
LOG.fatal("error parsing conf file: " + e);
throw new RuntimeException(e);
}
}
//从配置文件中获取所有键值对
/**
* Write out the non-default properties in this configuration to the give
* {@link OutputStream}.
*
* @param out the output stream to write to.
*/
public void writeXml(OutputStream out) throws IOException {
Properties properties = getProps();
try {
Document doc =
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
Element conf = doc.createElement("configuration");
doc.appendChild(conf);
conf.appendChild(doc.createTextNode("\n"));
for (Enumeration e = properties.keys(); e.hasMoreElements();) {
String name = (String)e.nextElement();
Object object = properties.get(name);
String value = null;
if (object instanceof String) {
value = (String) object;
}else {
continue;
}
Element propNode = doc.createElement("property");
conf.appendChild(propNode);

Element nameNode = doc.createElement("name");
nameNode.appendChild(doc.createTextNode(name));
propNode.appendChild(nameNode);

Element valueNode = doc.createElement("value");
valueNode.appendChild(doc.createTextNode(value));
propNode.appendChild(valueNode);

conf.appendChild(doc.createTextNode("\n"));
}

DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(out);
TransformerFactory transFactory = TransformerFactory.newInstance();
Transformer transformer = transFactory.newTransformer();
transformer.transform(source, result);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
//把所有非默认配置的配置按XML的形式输出到给定的输出流上
/**
* Writes out all the parameters and their properties (final and resource) to
* the given {@link Writer}
* The format of the output would be
* { "properties" : [ {key1,value1,key1.isFinal,key1.resource}, {key2,value2,
* key2.isFinal,key2.resource}... ] }
* It does not output the parameters of the configuration object which is
* loaded from an input stream.
* @param out the Writer to write to
* @throws IOException
*/
public static void dumpConfiguration(Configuration conf,
Writer out) throws IOException {
Configuration config = new Configuration(conf,true);
config.reloadConfiguration();
JsonFactory dumpFactory = new JsonFactory();
JsonGenerator dumpGenerator = dumpFactory.createJsonGenerator(out);
dumpGenerator.writeStartObject();
dumpGenerator.writeFieldName("properties");
dumpGenerator.writeStartArray();
dumpGenerator.flush();
for (Map.Entry<Object,Object> item: config.getProps().entrySet()) {
dumpGenerator.writeStartObject();
dumpGenerator.writeStringField("key", (String) item.getKey());
dumpGenerator.writeStringField("value",
config.get((String) item.getKey()));
dumpGenerator.writeBooleanField("isFinal",
config.finalParameters.contains(item.getKey()));
dumpGenerator.writeStringField("resource",
config.updatingResource.get(item.getKey()));
dumpGenerator.writeEndObject();
}
dumpGenerator.writeEndArray();
dumpGenerator.writeEndObject();
dumpGenerator.flush();
}
//把所有配置属性按某种格式导出来到指定输出流上
//注释中说不包括通过输入流配置的属性,就是说只导出文件的,从几个用到的地方也只看到了文件
//这里不太确定。
/**
* Get the {@link ClassLoader} for this job.
*
* @return the correct class loader.
*/
public ClassLoader getClassLoader() {
return classLoader;
}

/**
* Set the class loader that will be used to load the various objects.
*
* @param classLoader the new class loader.
*/
public void setClassLoader(ClassLoader classLoader) {
this.classLoader = classLoader;
}

@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("Configuration: ");
if(loadDefaults) {
toString(defaultResources, sb);
if(resources.size()>0) {
sb.append(", ");
}
}
toString(resources, sb);
return sb.toString();
}

private void toString(List resources, StringBuffer sb) {
ListIterator i = resources.listIterator();
while (i.hasNext()) {
if (i.nextIndex() != 0) {
sb.append(", ");
}
sb.append(i.next());
}
}

/**
* Set the quietness-mode.
*
* In the quiet-mode, error and informational messages might not be logged.
*
* @param quietmode <code>true</code> to set quiet-mode on, <code>false</code>
* to turn it off.
*/
public synchronized void setQuietMode(boolean quietmode) {
this.quietmode = quietmode;
}

/** For debugging. List non-default properties to the terminal and exit. */
public static void main(String[] args) throws Exception {
new Configuration().writeXml(System.out);
}

@Override
public void readFields(DataInput in) throws IOException {
clear();
int size = WritableUtils.readVInt(in);
for(int i=0; i < size; ++i) {
set(org.apache.hadoop.io.Text.readString(in),
org.apache.hadoop.io.Text.readString(in));
}
}
//序列化方法。把RPC中的流序列化成“对象”,这个方法是@Override的,所以对象就是配置属性
//set前调用了clear(),这个方法是本类的方法,然后clear()方法调用了HashTable的clear()方法,把所有配置全清了
//@Override
public void write(DataOutput out) throws IOException {
Properties props = getProps();
WritableUtils.writeVInt(out, props.size());
for(Map.Entry<Object, Object> item: props.entrySet()) {
org.apache.hadoop.io.Text.writeString(out, (String) item.getKey());
org.apache.hadoop.io.Text.writeString(out, (String) item.getValue());
}
}
//序列化方法。把对象序列化为RPC的流
/**
* get keys matching the the regex
* @param regex
* @return Map<String,String> with matching keys
*/
public Map<String,String> getValByRegex(String regex) {
Pattern p = Pattern.compile(regex);

Map<String,String> result = new HashMap<String,String>();
Matcher m;

for(Map.Entry<Object,Object> item: getProps().entrySet()) {
if (item.getKey() instanceof String &&
item.getValue() instanceof String) {
m = p.matcher((String)item.getKey());
if(m.find()) { // match
result.put((String) item.getKey(), (String) item.getValue());
}
}
}
return result;
}
}
//通过正则获取键值对集合