`

Hadoop--CLI的解析

阅读更多

基于 Apache Commons CLI 的命令行设计

通常情况下命令行处理有三个步骤:定义,解析和询问阶段

一、定义

package org.apache.hadoop.util;--GenericOptionsParser中

private static Options buildGeneralOptions(Options opts) {
    Option fs = OptionBuilder.withArgName("local|namenode:port")
    .hasArg()
    .withDescription("specify a namenode")
    .create("fs");
    Option jt = OptionBuilder.withArgName("local|jobtracker:port")
    .hasArg()
    .withDescription("specify a job tracker")
    .create("jt");
    Option oconf = OptionBuilder.withArgName("configuration file")
    .hasArg()
    .withDescription("specify an application configuration file")
    .create("conf");
    Option property = OptionBuilder.withArgName("property=value")
    .hasArg()
    .withDescription("use value for given property")
    .create('D');
    Option libjars = OptionBuilder.withArgName("paths")
    .hasArg()
    .withDescription("comma separated jar files to include in the classpath.")
    .create("libjars");
    Option files = OptionBuilder.withArgName("paths")
    .hasArg()
    .withDescription("comma separated files to be copied to the " +
           "map reduce cluster")
    .create("files");
    Option archives = OptionBuilder.withArgName("paths")
    .hasArg()
    .withDescription("comma separated archives to be unarchived" +
                     " on the compute machines.")
    .create("archives");
    // file with security tokens
    Option tokensFile = OptionBuilder.withArgName("tokensFile")
    .hasArg()
    .withDescription("name of the file with the tokens")
    .create("tokenCacheFile");

    opts.addOption(fs);
    opts.addOption(jt);
    opts.addOption(oconf);
    opts.addOption(property);
    opts.addOption(libjars);
    opts.addOption(files);
    opts.addOption(archives);
    opts.addOption(tokensFile);

    return opts;
  }

 二、解析

 /**
   * Parse the user-specified options, get the generic options, and modify
   * configuration accordingly
   * @param conf Configuration to be modified
   * @param args User-specified arguments
   * @return Command-specific arguments
   */
  private String[] parseGeneralOptions(Options opts, Configuration conf, 
      String[] args) throws IOException {
    opts = buildGeneralOptions(opts);
    /**
     * Apache CLI支持多种输入参数格式,主要支持的格式有以下几种
     * POSIX(Portable Operating System Interface of Unix)中的参数形式,例如 tar -zxvf foo.tar.gz
		GNU 中的长参数形式,例如 du --human-readable --max-depth=1
		Java 命令中的参数形式,例如 java -Djava.net.useSystemProxies=true Foo
		短杠参数带参数值的参数形式,例如 gcc -O2 foo.c
		长杠参数不带参数值的形式,例如 ant – projecthelp
     */
    //使用GNU解析
    CommandLineParser parser = new GnuParser();
    try {
      commandLine = parser.parse(opts, args, true);
      processGeneralOptions(conf, commandLine);
      return commandLine.getArgs();
    } catch(ParseException e) {
      LOG.warn("options parsing failed: "+e.getMessage());

      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp("general options are: ", opts);
    }
    return args;
  }

 三、询问

 /**
   * Modify configuration according user-specified generic options
   * @param conf Configuration to be modified
   * @param line User-specified generic options
   */
  private void processGeneralOptions(Configuration conf,
      CommandLine line) throws IOException {
    if (line.hasOption("fs")) {
      FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
      conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
      String[] values = line.getOptionValues("conf");
      for(String value : values) {
        conf.addResource(new Path(value));
      }
    }
    if (line.hasOption("libjars")) {
      conf.set("tmpjars", 
               validateFiles(line.getOptionValue("libjars"), conf));
      //setting libjars in client classpath
      URL[] libjars = getLibJars(conf);
      if(libjars!=null && libjars.length>0) {
        conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
        Thread.currentThread().setContextClassLoader(
            new URLClassLoader(libjars, 
                Thread.currentThread().getContextClassLoader()));
      }
    }
    if (line.hasOption("files")) {
      conf.set("tmpfiles", 
               validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
      conf.set("tmparchives", 
                validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
      String[] property = line.getOptionValues('D');
      for(String prop : property) {
        String[] keyval = prop.split("=", 2);
        if (keyval.length == 2) {
          conf.set(keyval[0], keyval[1]);
        }
      }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);
    
    // tokensFile
    if(line.hasOption("tokenCacheFile")) {
      String fileName = line.getOptionValue("tokenCacheFile");
      // check if the local file exists
      try 
      {
        FileSystem localFs = FileSystem.getLocal(conf);
        Path p = new Path(fileName);
        if (!localFs.exists(p)) {
          throw new FileNotFoundException("File "+fileName+" does not exist.");
        }

        LOG.debug("setting conf tokensFile: " + fileName);
        conf.set("mapreduce.job.credentials.json", 
                 localFs.makeQualified(p).toString());
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }

 解析完成之后再执行,执行代码如下,在ToolRunner中

public static int run(Configuration conf, Tool tool, String[] args) 
    throws Exception{
    if(conf == null) {
      conf = new Configuration();
    }
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
    //set the configuration back, so that Tool can configure itself
    tool.setConf(conf);
    
    //get the args w/o generic hadoop args
    String[] toolArgs = parser.getRemainingArgs();
    return tool.run(toolArgs);
  }

 

 

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics