Sample yarn-site.xml for Apache Hadoop 2.7.3

This is a sample hdfs-site.xml for Apache Hadoop with essential properties. A skeleton hdfs-site.xml is also found in etc/hadoop directory after unpacking the tarball.

yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->

  <!-- Configurations for ResourceManager and NodeManager -->


  <property>
    <name>yarn.acl.enable</name>
    <value>false</value>
    <description>Enable ACLs</description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.admin.acl</name>
    <value></value>
    <description>
         ACL to set admins on the cluster. ACLs are of for
         comma-separated-usersspacecomma-separated-groups.
         Defaults to special value of * which means anyone. 
         Special value of just space means no one has access.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>false</value>
    <description>Configuration to enable or disable log aggregation</description>
    <final>true</final>
  </property>

  <!-- Configurations for ResourceManager -->

  <property>
    <name>yarn.resourcemanager.address</name>
    <value></value>
    <description>
            ResourceManager host:port for clients to submit jobs.
            host:port If set, overrides the hostname set in 
            yarn.resourcemanager.hostname.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value></value>
    <description>
           ResourceManager host:port for ApplicationMasters to talk to Scheduler
           to obtain resources. host:port If set, overrides the hostname set in 
           yarn.resourcemanager.hostname.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value></value>
    <description>
         ResourceManager host:port for NodeManagers.
         host:port If set, overrides the hostname set 
         in yarn.resourcemanager.hostname.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.resourcemanager.admin.address</name>
    <value></value>
    <description>
          ResourceManager host:port for administrative 
          commands.host:port If set, overrides the hostname 
          set in yarn.resourcemanager.hostname.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value></value>
    <description>
          ResourceManager web-ui host:port. host:port. 
          If set, overrides the hostname set in
          yarn.resourcemanager.hostname.
    </description>
    <final>true</final>
  </property>


  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value></value>
    <description>
        ResourceManager host. host. Single hostname 
        that can be set in place of setting all 
        yarn.resourcemanager*address resources. Results 
        in default ports for ResourceManager components.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value></value>
    <description>
          ResourceManager Scheduler class. CapacityScheduler (recommended), 
          FairScheduler (also recommended), or FifoScheduler
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value></value>
    <description>
         Minimum limit of memory to allocate to each 
         container request at the Resource Manager.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value></value>
    <description>
         Maximum limit of memory to allocate to each container
         request at the Resource Manager.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.resourcemanager.nodes.include-path</name>
    <value></value>
    <description>
         List of permitted/excluded NodeManagers.If necessary,
         use these files to control the list of allowable NodeManagers.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value></value>
    <description>
          Resource i.e. available physical memory, in MB, 
          for given NodeManager Defines total available 
          resources on the NodeManager to be made available
          to running containers.
    </description>
    <final>true</final>
  </property>

  <!-- Configurations for NodeManager -->

  <property>
    <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value></value>
    <description>
         Maximum ratio by which virtual memory usage of
         tasks may exceed physical memory.The virtual 
         memory usage of each task may exceed its physical
         memory limit by this ratio. The total amount of 
         virtual memory used by tasks on the NodeManager 
         may exceed its physical memory usage by this ratio.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value></value>
    <description>
          Comma-separated list of paths on the local filesystem 
          where intermediate data is written. Multiple paths
          help spread disk i/o.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value></value>
    <description>
           Comma-separated list of paths on the local filesystem
           where logs are written. Multiple paths help spread disk i/o.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.log.retain-seconds</name>
    <value>10800</value>
    <description>
         Default time (in seconds) to retain log files on the 
         NodeManager Only applicable if log-aggregation is disabled.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/logs</value>
    <description>
         HDFS directory where the application logs are moved
         on application completion. Need to set appropriate
         permissions. Only applicable if log-aggregation is 
         enabled.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
    <value>logs</value>
    <description>
         Suffix appended to the remote log dir. Logs will
         be aggregated to 
         ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam}.
         Only applicable if log-aggregation is enabled.
    </description>
    <final>true</final>
  </property>

  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
    <description>
         Shuffle service that needs to be set for Map Reduce
         applications.
    </description>
    <final>true</final>
  </property>

</configuration>

Comments