Implementation of Vanilla Plugin

* Implemented configure_cluster and start_cluster methods * Cluster configuring is service:process specific * Added basic unit tests * Added basic validation Implements blueprint vanilla-hadoop-plugin Change-Id: I51c55557b6bb073a7eae7eefc2f21cc2c2df0385
2013-06-03 10:43:28 -07:00 · 2013-06-03 10:43:28 -07:00 · fa62f6872a
commit fa62f6872a
parent 10ef73c80b
14 changed files with 2938 additions and 14 deletions
--- a/savanna/plugins/vanilla/config_helper.py
+++ b/savanna/plugins/vanilla/config_helper.py
@ -0,0 +1,180 @@
+# Copyright (c) 2013 Mirantis Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pkg_resources as pkg
+import xml.dom.minidom as xml
+
+import jinja2 as j2
+
+from savanna.plugins import provisioning as p
+from savanna import version
+
+
+def _load_xml_default_configs(file_name):
+    doc = xml.parse(
+        pkg.resource_filename(version.version_info.package,
+                              'plugins/vanilla/resources/%s' % file_name)
+    )
+
+    properties = doc.getElementsByTagName("name")
+    return [prop.childNodes[0].data for prop in properties]
+
+
+CORE_DEFAULT = _load_xml_default_configs('core-default.xml')
+HDFS_DEFAULT = _load_xml_default_configs('hdfs-default.xml')
+MAPRED_DEFAULT = _load_xml_default_configs('mapred-default.xml')
+
+XML_CONFS = {
+    "HDFS": [CORE_DEFAULT, HDFS_DEFAULT],
+    "MAPREDUCE": [MAPRED_DEFAULT]
+}
+
+# TODO(aignatov): Environmental configs could be more complex
+ENV_CONFS = {
+    "MAPREDUCE": {
+        'job_tracker_heap_size': 'HADOOP_JOBTRACKER_OPTS=\\"-Xmx%sm\\"',
+        'task_tracker_heap_size': 'HADOOP_TASKTRACKER_OPTS=\\"-Xmx%sm\\"'
+    },
+    "HDFS": {
+        'name_node_heap_size': 'HADOOP_NAMENODE_OPTS=\\"-Xmx%sm\\"',
+        'data_node_heap_size': 'HADOOP_DATANODE_OPTS=\\"-Xmx%sm\\"'
+    }
+}
+
+
+def _initialise_configs():
+    configs = []
+    for service, config_lists in XML_CONFS.iteritems():
+        for config_list in config_lists:
+            for config_name in config_list:
+                # TODO(aignatov): Need to add default values and types
+                configs.append(
+                    p.Config(config_name, service, "node", is_optional=True))
+
+    for service, config_items in ENV_CONFS.iteritems():
+        for name, param_format_str in config_items.iteritems():
+            configs.append(p.Config(name, service, "node", default_value=1024))
+
+    return configs
+
+# Initialise plugin Hadoop configurations
+PLUGIN_CONFIGS = _initialise_configs()
+
+
+def get_plugin_configs():
+    return PLUGIN_CONFIGS
+
+
+def _create_xml(configs, global_conf):
+    doc = xml.Document()
+
+    pi = doc.createProcessingInstruction('xml-stylesheet',
+                                         'type="text/xsl" '
+                                         'href="configuration.xsl"')
+    doc.insertBefore(pi, doc.firstChild)
+
+    # Create the <configuration> base element
+    configuration = doc.createElement("configuration")
+    doc.appendChild(configuration)
+
+    for prop_name, prop_value in configs.items():
+        if prop_name in global_conf:
+            # Create the <property> element
+            property = doc.createElement("property")
+            configuration.appendChild(property)
+
+            # Create a <name> element in <property>
+            name = doc.createElement("name")
+            property.appendChild(name)
+
+            # Give the <name> element some hadoop config name
+            name_text = doc.createTextNode(prop_name)
+            name.appendChild(name_text)
+
+            # Create a <value> element in <property>
+            value = doc.createElement("value")
+            property.appendChild(value)
+
+            # Give the <value> element some hadoop config value
+            value_text = doc.createTextNode(prop_value)
+            value.appendChild(value_text)
+
+    # Return newly created XML
+    return doc.toprettyxml(indent="  ")
+
+
+def generate_xml_configs(configs, nn_hostname, jt_hostname=None):
+    # inserting common configs depends on provisioned VMs and HDFS placement
+    # TODO(aignatov): should be moved to cluster context
+    cfg = {
+        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
+        'dfs.name.dir': '/mnt/lib/hadoop/hdfs/namenode',
+        'dfs.data.dir': '/mnt/lib/hadoop/hdfs/datanode',
+    }
+
+    if jt_hostname:
+        mr_cfg = {
+            'mapred.job.tracker': '%s:8021' % jt_hostname,
+            'mapred.system.dir': '/mnt/mapred/mapredsystem',
+            'mapred.local.dir': '/mnt/lib/hadoop/mapred'
+        }
+        cfg.update(mr_cfg)
+
+    # inserting user-defined configs
+    for key, value in extract_xml_confs(configs):
+        cfg[key] = value
+
+    # invoking applied configs to appropriate xml files
+    xml_configs = {
+        'core-site': _create_xml(cfg, CORE_DEFAULT),
+        'mapred-site': _create_xml(cfg, MAPRED_DEFAULT),
+        'hdfs-site': _create_xml(cfg, HDFS_DEFAULT)
+    }
+
+    return xml_configs
+
+
+def extract_environment_confs(configs):
+    """Returns list of Hadoop parameters which should be passed via environment
+    """
+    lst = []
+    for service, srv_confs in configs.items():
+        for param_name, param_value in srv_confs.items():
+            for cfg_name, cfg_format_str in ENV_CONFS[service].items():
+                if param_name == cfg_name and param_value is not None:
+                    lst.append(cfg_format_str % param_value)
+    return lst
+
+
+def extract_xml_confs(configs):
+    """Returns list of Hadoop parameters which should be passed into general
+    configs like core-site.xml
+    """
+    lst = []
+    for service, srv_confs in configs.items():
+        for param_name, param_value in srv_confs.items():
+            for cfg_list in XML_CONFS[service]:
+                if param_name in cfg_list and param_value is not None:
+                    lst.append((param_name, param_value))
+    return lst
+
+
+env = j2.Environment(loader=j2.PackageLoader('savanna',
+                                             'plugins/vanilla/resources'))
+
+
+def render_template(template_name, **kwargs):
+    template = env.get_template('%s.template' % template_name)
+    return template.render(**kwargs)
--- a/savanna/plugins/vanilla/exceptions.py
+++ b/savanna/plugins/vanilla/exceptions.py
@ -0,0 +1,36 @@
+# Copyright (c) 2013 Mirantis Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import savanna.exceptions as e
+
+
+class NotSingleNameNodeException(e.SavannaException):
+    def __init__(self, nn_count):
+        self.message = "Hadoop cluster should contain only 1 NameNode " \
+                       "instance. Actual NN count is %s" % nn_count
+        self.code = "NOT_SINGLE_NAME_NODE"
+
+
+class NotSingleJobTrackerException(e.SavannaException):
+    def __init__(self, jt_count):
+        self.message = "Hadoop cluster should contain 0 or 1 JobTracker " \
+                       "instances. Actual JT count is %s" % jt_count
+        self.code = "NOT_SINGLE_JOB_TRACKER"
+
+
+class TaskTrackersWithoutJobTracker(e.SavannaException):
+    def __init__(self):
+        self.message = "TaskTrackers cannot be configures without JobTracker"
+        self.code = "TASK_TRACKERS_WITHOUT_JOB_TRACKER"
--- a/savanna/plugins/vanilla/plugin.py
+++ b/savanna/plugins/vanilla/plugin.py
@ -13,10 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from savanna.openstack.common import log as logging
 from savanna.plugins import provisioning as p
+from savanna.plugins.vanilla import config_helper as c_helper
+from savanna.plugins.vanilla import exceptions as ex
+from savanna.plugins.vanilla import utils
+
+LOG = logging.getLogger(__name__)


 class VanillaProvider(p.ProvisioningPluginBase):
+    def __init__(self):
+        self.processes = {
+            "HDFS": ["namenode", "datanode", "secondarynamenode"],
+            "MAPREDUCE": ["tasktracker", "jobtracker"]
+        }
+
    def get_plugin_opts(self):
        return []

@ -32,28 +44,113 @@ class VanillaProvider(p.ProvisioningPluginBase):
            "cluster without any management consoles.")

    def get_versions(self):
-        return ['1.1.1']
+        return ['1.1.2']

    def get_configs(self, hadoop_version):
-        return [
-            p.Config('Task Tracker heap size', 'mapreduce', "node",
-                     default_value='1024M')
-        ]
+        return c_helper.get_plugin_configs()

    def get_node_processes(self, hadoop_version):
-        return {
-            'mapreduce': ['jobtracker', 'tasktracker'],
-            'hdfs': ['namenode', 'datanode']
-        }
+        return self.processes

    def validate(self, cluster):
-        pass
+        nn_count = sum([ng.count for ng
+                        in utils.get_node_groups(cluster, "namenode")])
+        if nn_count is not 1:
+            raise ex.NotSingleNameNodeException(nn_count)
+
+        jt_count = sum([ng.count for ng
+                        in utils.get_node_groups(cluster, "jobtracker")])
+
+        if jt_count not in [0, 1]:
+            raise ex.NotSingleJobTrackerException(jt_count)
+
+        tt_count = sum([ng.count for ng
+                        in utils.get_node_groups(cluster, "tasktracker")])
+        if jt_count is 0 and tt_count > 0:
+            raise ex.TaskTrackersWithoutJobTracker()

    def update_infra(self, cluster):
        pass

    def configure_cluster(self, cluster):
-        pass
+        for ng in cluster.node_groups:
+            for inst in ng.instances:
+                inst.remote.execute_command(
+                    'sudo chown -R $USER:$USER /etc/hadoop'
+                )
+
+        self._extract_configs(cluster)
+        self._push_configs_to_nodes(cluster)

    def start_cluster(self, cluster):
-        pass
+        nn_instance = utils.get_namenode(cluster)
+        jt_instance = utils.get_jobtracker(cluster)
+
+        nn_instance.remote.execute_command(
+            'sudo su -c /usr/sbin/start-dfs.sh hadoop >>'
+            ' /tmp/savanna-hadoop-start-dfs.log')
+
+        LOG.info("HDFS service at '%s' has been started", nn_instance.hostname)
+
+        if jt_instance:
+            jt_instance.remote.execute_command(
+                'sudo su -c /usr/sbin/start-mapred.sh hadoop >>'
+                ' /tmp/savanna-hadoop-start-mapred.log')
+            LOG.info("MAPREDUCE service at '%s' has been started",
+                     jt_instance.hostname)
+
+        LOG.info('Cluster %s has been started successfully' % cluster.name)
+
+    def _extract_configs(self, cluster):
+        nn = utils.get_namenode(cluster)
+        jt = utils.get_jobtracker(cluster)
+        for ng in cluster.node_groups:
+            #TODO(aignatov): setup_script should be replaced with remote calls
+            ng.extra = {
+                'xml': c_helper.generate_xml_configs(ng.configuration,
+                                                     nn.hostname,
+                                                     jt.hostname
+                                                     if jt else None),
+                'setup_script': c_helper.render_template(
+                    'setup-general.sh',
+                    args={
+                        'env_configs': c_helper.extract_environment_confs(
+                            ng.configuration)
+                    }
+                )
+            }
+
+    def _push_configs_to_nodes(self, cluster):
+        for ng in cluster.node_groups:
+            for inst in ng.instances:
+                inst.remote.write_file_to('/etc/hadoop/core-site.xml',
+                                          ng.extra['xml']['core-site'])
+                inst.remote.write_file_to('/etc/hadoop/mapred-site.xml',
+                                          ng.extra['xml']['mapred-site'])
+                inst.remote.write_file_to('/etc/hadoop/hdfs-site.xml',
+                                          ng.extra['xml']['hdfs-site'])
+                inst.remote.write_file_to('/tmp/savanna-hadoop-init.sh',
+                                          ng.extra['setup_script'])
+                inst.remote.execute_command(
+                    'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
+                )
+                inst.remote.execute_command(
+                    'sudo /tmp/savanna-hadoop-init.sh '
+                    '>> /tmp/savanna-hadoop-init.log 2>&1')
+
+        nn = utils.get_namenode(cluster)
+        jt = utils.get_jobtracker(cluster)
+
+        nn.remote.write_file_to('/etc/hadoop/slaves',
+                                utils.generate_host_names(
+                                utils.get_datanodes(cluster)))
+        nn.remote.write_file_to('/etc/hadoop/masters',
+                                utils.generate_host_names(
+                                utils.get_secondarynamenodes(cluster)))
+        nn.remote.execute_command(
+            "sudo su -c 'hadoop namenode -format' hadoop")
+
+        if jt and nn.instance_id != jt.instance_id:
+            jt.remote.write_file_to('/etc/hadoop/slaves',
+                                    utils.generate_host_names(
+                                    utils.get_tasktrackers(cluster)))
--- a/savanna/plugins/vanilla/resources/README.rst
+++ b/savanna/plugins/vanilla/resources/README.rst
@ -0,0 +1,20 @@
+Apache Hadoop Configurations for Savanna
+========================================
+
+This directory contains default XML configuration files:
+
+* core-default.xml,
+* hdfs-default.xml,
+* mapred-default.xml
+
+These files are applied for Savanna's plugin of Apache Hadoop version 1.1.2.
+
+Files were taken from here:
+https://github.com/apache/hadoop-common/blob/release-1.1.2/src/hdfs/hdfs-default.xml
+https://github.com/apache/hadoop-common/blob/release-1.1.2/src/mapred/mapred-default.xml
+https://github.com/apache/hadoop-common/blob/release-1.1.2/src/core/core-default.xml
+
+XML configs are used to expose default Hadoop configurations to the users through
+the Savanna's REST API. It allows users to override some config values which will
+be pushed to the provisioned VMs running Hadoop services as part of appropriate
+xml config.
--- a/savanna/plugins/vanilla/resources/core-default.xml
+++ b/savanna/plugins/vanilla/resources/core-default.xml
@ -0,0 +1,580 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Do not modify this file directly.  Instead, copy entries that you -->
+<!-- wish to modify from this file into core-site.xml and change them -->
+<!-- there.  If core-site.xml does not already exist, create it.      -->
+
+<configuration>
+
+<!--- global properties -->
+
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>/tmp/hadoop-${user.name}</value>
+  <description>A base for other temporary directories.</description>
+</property>
+
+<property>
+  <name>hadoop.native.lib</name>
+  <value>true</value>
+  <description>Should native hadoop libraries, if present, be used.</description>
+</property>
+
+<property>
+  <name>hadoop.http.filter.initializers</name>
+  <value></value>
+  <description>A comma separated list of class names. Each class in the list 
+  must extend org.apache.hadoop.http.FilterInitializer. The corresponding 
+  Filter will be initialized. Then, the Filter will be applied to all user 
+  facing jsp and servlet web pages.  The ordering of the list defines the 
+  ordering of the filters.</description>
+</property>
+
+ <property>
+  <name>hadoop.security.group.mapping</name>
+  <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
+  <description>Class for user to group mapping (get groups for a given user)
+  </description>
+</property>
+
+<property>
+  <name>hadoop.security.authorization</name>
+  <value>false</value>
+  <description>Is service-level authorization enabled?</description>
+</property>
+
+<property>
+  <name>hadoop.security.authentication</name>
+  <value>simple</value>
+  <description>Possible values are simple (no authentication), and kerberos
+  </description>
+</property>
+
+<property>
+  <name>hadoop.security.token.service.use_ip</name>
+  <value>true</value>
+  <description>Controls whether tokens always use IP addresses.  DNS changes
+  will not be detected if this option is enabled.  Existing client connections
+  that break will always reconnect to the IP of the original host.  New clients
+  will connect to the host's new IP but fail to locate a token.  Disabling
+  this option will allow existing and new clients to detect an IP change and
+  continue to locate the new host's token.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.security.use-weak-http-crypto</name>
+  <value>false</value>
+  <description>If enabled, use KSSL to authenticate HTTP connections to the
+  NameNode. Due to a bug in JDK6, using KSSL requires one to configure
+  Kerberos tickets to use encryption types that are known to be
+  cryptographically weak. If disabled, SPNEGO will be used for HTTP
+  authentication, which supports stronger encryption types.
+  </description>
+</property>
+
+<!--
+<property>
+  <name>hadoop.security.service.user.name.key</name>
+  <value></value>
+  <description>Name of the kerberos principal of the user that owns
+  a given service daemon
+  </description>
+</property>
+-->
+
+<!--- logging properties -->
+
+<property>
+  <name>hadoop.logfile.size</name>
+  <value>10000000</value>
+  <description>The max size of each log file</description>
+</property>
+
+<property>
+  <name>hadoop.logfile.count</name>
+  <value>10</value>
+  <description>The max number of log files</description>
+</property>
+
+<!-- i/o properties -->
+<property>
+  <name>io.file.buffer.size</name>
+  <value>4096</value>
+  <description>The size of buffer for use in sequence files.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</description>
+</property>
+  
+<property>
+  <name>io.bytes.per.checksum</name>
+  <value>512</value>
+  <description>The number of bytes per checksum.  Must not be larger than
+  io.file.buffer.size.</description>
+</property>
+
+<property>
+  <name>io.skip.checksum.errors</name>
+  <value>false</value>
+  <description>If true, when a checksum error is encountered while
+  reading a sequence file, entries are skipped, instead of throwing an
+  exception.</description>
+</property>
+
+<property>
+  <name>io.compression.codecs</name>
+  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
+  <description>A list of the compression codec classes that can be used 
+               for compression/decompression.</description>
+</property>
+
+<property>
+  <name>io.serializations</name>
+  <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
+  <description>A list of serialization classes that can be used for
+  obtaining serializers and deserializers.</description>
+</property>
+
+<!-- file system properties -->
+
+<property>
+  <name>fs.default.name</name>
+  <value>file:///</value>
+  <description>The name of the default file system.  A URI whose
+  scheme and authority determine the FileSystem implementation.  The
+  uri's scheme determines the config property (fs.SCHEME.impl) naming
+  the FileSystem implementation class.  The uri's authority is used to
+  determine the host, port, etc. for a filesystem.</description>
+</property>
+
+<property>
+  <name>fs.trash.interval</name>
+  <value>0</value>
+  <description>Number of minutes between trash checkpoints.
+  If zero, the trash feature is disabled.
+  </description>
+</property>
+
+<property>
+  <name>fs.file.impl</name>
+  <value>org.apache.hadoop.fs.LocalFileSystem</value>
+  <description>The FileSystem for file: uris.</description>
+</property>
+
+<property>
+  <name>fs.hdfs.impl</name>
+  <value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
+  <description>The FileSystem for hdfs: uris.</description>
+</property>
+
+<property>
+  <name>fs.s3.impl</name>
+  <value>org.apache.hadoop.fs.s3.S3FileSystem</value>
+  <description>The FileSystem for s3: uris.</description>
+</property>
+
+<property>
+  <name>fs.s3n.impl</name>
+  <value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
+  <description>The FileSystem for s3n: (Native S3) uris.</description>
+</property>
+
+<property>
+  <name>fs.kfs.impl</name>
+  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
+  <description>The FileSystem for kfs: uris.</description>
+</property>
+
+<property>
+  <name>fs.hftp.impl</name>
+  <value>org.apache.hadoop.hdfs.HftpFileSystem</value>
+</property>
+
+<property>
+  <name>fs.hsftp.impl</name>
+  <value>org.apache.hadoop.hdfs.HsftpFileSystem</value>
+</property>
+
+<property>
+  <name>fs.webhdfs.impl</name>
+  <value>org.apache.hadoop.hdfs.web.WebHdfsFileSystem</value>
+</property>
+
+<property>
+  <name>fs.ftp.impl</name>
+  <value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
+  <description>The FileSystem for ftp: uris.</description>
+</property>
+
+<property>
+  <name>fs.ramfs.impl</name>
+  <value>org.apache.hadoop.fs.InMemoryFileSystem</value>
+  <description>The FileSystem for ramfs: uris.</description>
+</property>
+
+<property>
+  <name>fs.har.impl</name>
+  <value>org.apache.hadoop.fs.HarFileSystem</value>
+  <description>The filesystem for Hadoop archives. </description>
+</property>
+
+<property>
+  <name>fs.har.impl.disable.cache</name>
+  <value>true</value>
+  <description>Don't cache 'har' filesystem instances.</description>
+</property>
+
+<property>
+  <name>fs.checkpoint.dir</name>
+  <value>${hadoop.tmp.dir}/dfs/namesecondary</value>
+  <description>Determines where on the local filesystem the DFS secondary
+      name node should store the temporary images to merge.
+      If this is a comma-delimited list of directories then the image is
+      replicated in all of the directories for redundancy.
+  </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.edits.dir</name>
+  <value>${fs.checkpoint.dir}</value>
+  <description>Determines where on the local filesystem the DFS secondary
+      name node should store the temporary edits to merge.
+      If this is a comma-delimited list of directoires then teh edits is
+      replicated in all of the directoires for redundancy.
+      Default value is same as fs.checkpoint.dir
+  </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.period</name>
+  <value>3600</value>
+  <description>The number of seconds between two periodic checkpoints.
+  </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.size</name>
+  <value>67108864</value>
+  <description>The size of the current edit log (in bytes) that triggers
+       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
+  </description>
+</property>
+
+
+
+<property>
+  <name>fs.s3.block.size</name>
+  <value>67108864</value>
+  <description>Block size to use when writing files to S3.</description>
+</property>
+
+<property>
+  <name>fs.s3.buffer.dir</name>
+  <value>${hadoop.tmp.dir}/s3</value>
+  <description>Determines where on the local filesystem the S3 filesystem
+  should store files before sending them to S3
+  (or after retrieving them from S3).
+  </description>
+</property>
+
+<property>
+  <name>fs.s3.maxRetries</name>
+  <value>4</value>
+  <description>The maximum number of retries for reading or writing files to S3, 
+  before we signal failure to the application.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3.sleepTimeSeconds</name>
+  <value>10</value>
+  <description>The number of seconds to sleep between each S3 retry.
+  </description>
+</property>
+
+
+<property>
+  <name>local.cache.size</name>
+  <value>10737418240</value>
+  <description>The limit on the size of cache you want to keep, set by default
+  to 10GB. This will act as a soft limit on the cache directory for out of band data.
+  </description>
+</property>
+            
+<property>
+  <name>io.seqfile.compress.blocksize</name>
+  <value>1000000</value>
+  <description>The minimum block size for compression in block compressed 
+          SequenceFiles.
+  </description>
+</property>
+
+<property>
+  <name>io.seqfile.lazydecompress</name>
+  <value>true</value>
+  <description>Should values of block-compressed SequenceFiles be decompressed
+          only when necessary.
+  </description>
+</property>
+
+<property>
+  <name>io.seqfile.sorter.recordlimit</name>
+  <value>1000000</value>
+  <description>The limit on number of records to be kept in memory in a spill 
+          in SequenceFiles.Sorter
+  </description>
+</property>
+
+ <property>
+  <name>io.mapfile.bloom.size</name>
+  <value>1048576</value>
+  <description>The size of BloomFilter-s used in BloomMapFile. Each time this many
+  keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
+  Larger values minimize the number of filters, which slightly increases the performance,
+  but may waste too much space if the total number of keys is usually much smaller
+  than this number.
+  </description>
+</property>
+
+<property>
+  <name>io.mapfile.bloom.error.rate</name>
+  <value>0.005</value>
+  <description>The rate of false positives in BloomFilter-s used in BloomMapFile.
+  As this value decreases, the size of BloomFilter-s increases exponentially. This
+  value is the probability of encountering false positives (default is 0.5%).
+  </description>
+</property>
+
+<property>
+  <name>hadoop.util.hash.type</name>
+  <value>murmur</value>
+  <description>The default implementation of Hash. Currently this can take one of the
+  two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
+  </description>
+</property>
+
+
+<!-- ipc properties -->
+
+<property>
+  <name>ipc.client.idlethreshold</name>
+  <value>4000</value>
+  <description>Defines the threshold number of connections after which
+               connections will be inspected for idleness.
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.kill.max</name>
+  <value>10</value>
+  <description>Defines the maximum number of clients to disconnect in one go.
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.connection.maxidletime</name>
+  <value>10000</value>
+  <description>The maximum time in msec after which a client will bring down the
+               connection to the server.
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.connect.max.retries</name>
+  <value>10</value>
+  <description>Indicates the number of retries a client will make to establish
+               a server connection.
+  </description>
+</property>
+
+<property>
+  <name>ipc.server.listen.queue.size</name>
+  <value>128</value>
+  <description>Indicates the length of the listen queue for servers accepting
+               client connections.
+  </description>
+</property>
+
+<property>
+  <name>ipc.server.tcpnodelay</name>
+  <value>false</value>
+  <description>Turn on/off Nagle's algorithm for the TCP socket connection on 
+  the server. Setting to true disables the algorithm and may decrease latency
+  with a cost of more/smaller packets. 
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.tcpnodelay</name>
+  <value>false</value>
+  <description>Turn on/off Nagle's algorithm for the TCP socket connection on 
+  the client. Setting to true disables the algorithm and may decrease latency
+  with a cost of more/smaller packets. 
+  </description>
+</property>
+
+
+<!-- Web Interface Configuration -->
+
+<property>
+  <name>webinterface.private.actions</name>
+  <value>false</value>
+  <description> If set to true, the web interfaces of JT and NN may contain 
+                actions, such as kill job, delete file, etc., that should 
+                not be exposed to public. Enable this option if the interfaces 
+                are only reachable by those who have the right authorization.
+  </description>
+</property>
+
+<!-- Proxy Configuration -->
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.default</name>
+  <value>org.apache.hadoop.net.StandardSocketFactory</value>
+  <description> Default SocketFactory to use. This parameter is expected to be
+    formatted as "package.FactoryClassName".
+  </description>
+</property>
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
+  <value></value>
+  <description> SocketFactory to use to connect to a DFS. If null or empty, use
+    hadoop.rpc.socket.class.default. This socket factory is also used by
+    DFSClient to create sockets to DataNodes.
+  </description>
+</property>
+
+
+
+<property>
+  <name>hadoop.socks.server</name>
+  <value></value>
+  <description> Address (host:port) of the SOCKS server to be used by the
+    SocksSocketFactory.
+  </description>
+</property>
+
+<!-- Rack Configuration -->
+
+<property>
+  <name>topology.node.switch.mapping.impl</name>
+  <value>org.apache.hadoop.net.ScriptBasedMapping</value>
+  <description> The default implementation of the DNSToSwitchMapping. It
+    invokes a script specified in topology.script.file.name to resolve
+    node names. If the value for topology.script.file.name is not set, the
+    default value of DEFAULT_RACK is returned for all node names.
+  </description>
+</property>
+
+<property>
+  <name>topology.script.file.name</name>
+  <value></value>
+  <description> The script name that should be invoked to resolve DNS names to
+    NetworkTopology names. Example: the script would take host.foo.bar as an
+    argument, and return /rack1 as the output.
+  </description>
+</property>
+
+<property>
+  <name>topology.script.number.args</name>
+  <value>100</value>
+  <description> The max number of args that the script configured with 
+    topology.script.file.name should be run with. Each arg is an
+    IP address.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.security.uid.cache.secs</name>
+  <value>14400</value>
+  <description> NativeIO maintains a cache from UID to UserName. This is
+  the timeout for an entry in that cache. </description>
+</property>
+
+<!-- HTTP web-consoles Authentication -->
+
+<property>
+  <name>hadoop.http.authentication.type</name>
+  <value>simple</value>
+  <description>
+    Defines authentication used for Oozie HTTP endpoint.
+    Supported values are: simple | kerberos | #AUTHENTICATION_HANDLER_CLASSNAME#
+  </description>
+</property>
+
+<property>
+  <name>hadoop.http.authentication.token.validity</name>
+  <value>36000</value>
+  <description>
+    Indicates how long (in seconds) an authentication token is valid before it has
+    to be renewed.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.http.authentication.signature.secret.file</name>
+  <value>${user.home}/hadoop-http-auth-signature-secret</value>
+  <description>
+    The signature secret for signing the authentication tokens.
+    If not set a random secret is generated at startup time.
+    The same secret should be used for JT/NN/DN/TT configurations.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.http.authentication.cookie.domain</name>
+  <value></value>
+  <description>
+    The domain to use for the HTTP cookie that stores the authentication token.
+    In order to authentiation to work correctly across all Hadoop nodes web-consoles
+    the domain must be correctly set.
+    IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
+    For this setting to work properly all nodes in the cluster must be configured
+    to generate URLs with hostname.domain names on it.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.http.authentication.simple.anonymous.allowed</name>
+  <value>true</value>
+  <description>
+    Indicates if anonymous requests are allowed when using 'simple' authentication.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.http.authentication.kerberos.principal</name>
+  <value>HTTP/localhost@LOCALHOST</value>
+  <description>
+    Indicates the Kerberos principal to be used for HTTP endpoint.
+    The principal MUST start with 'HTTP/' as per Kerberos HTTP SPNEGO specification.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.http.authentication.kerberos.keytab</name>
+  <value>${user.home}/hadoop.keytab</value>
+  <description>
+    Location of the keytab file with the credentials for the principal.
+    Referring to the same keytab file Oozie uses for its Kerberos credentials for Hadoop.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.relaxed.worker.version.check</name>
+  <value>false</value>
+  <description>
+    By default datanodes refuse to connect to namenodes if their build
+    revision (svn revision) do not match, and tasktrackers refuse to
+    connect to jobtrackers if their build version (version, revision,
+    user, and source checksum) do not match. This option changes the
+    behavior of hadoop workers to only check for a version match (eg
+    "1.0.2") but ignore the other build fields (revision, user, and
+    source checksum).
+  </description>
+</property>
+
+</configuration>
--- a/savanna/plugins/vanilla/resources/hdfs-default.xml
+++ b/savanna/plugins/vanilla/resources/hdfs-default.xml
@ -0,0 +1,547 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Do not modify this file directly.  Instead, copy entries that you -->
+<!-- wish to modify from this file into hdfs-site.xml and change them -->
+<!-- there.  If hdfs-site.xml does not already exist, create it.      -->
+
+<configuration>
+
+<property>
+  <name>dfs.namenode.logging.level</name>
+  <value>info</value>
+  <description>The logging level for dfs namenode. Other values are "dir"(trac
+e namespace mutations), "block"(trace block under/over replications and block
+creations/deletions), or "all".</description>
+</property>
+
+<property>
+  <name>dfs.secondary.http.address</name>
+  <value>0.0.0.0:50090</value>
+  <description>
+    The secondary namenode http server address and port.
+    If the port is 0 then the server will start on a free port.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.address</name>
+  <value>0.0.0.0:50010</value>
+  <description>
+    The datanode server address and port for data transfer.
+    If the port is 0 then the server will start on a free port.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.http.address</name>
+  <value>0.0.0.0:50075</value>
+  <description>
+    The datanode http server address and port.
+    If the port is 0 then the server will start on a free port.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.ipc.address</name>
+  <value>0.0.0.0:50020</value>
+  <description>
+    The datanode ipc server address and port.
+    If the port is 0 then the server will start on a free port.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.handler.count</name>
+  <value>3</value>
+  <description>The number of server threads for the datanode.</description>
+</property>
+
+<property>
+  <name>dfs.http.address</name>
+  <value>0.0.0.0:50070</value>
+  <description>
+    The address and the base port where the dfs namenode web ui will listen on.
+    If the port is 0 then the server will start on a free port.
+  </description>
+</property>
+
+<property>
+  <name>dfs.https.enable</name>
+  <value>false</value>
+  <description>Decide if HTTPS(SSL) is supported on HDFS
+  </description>
+</property>
+
+<property>
+  <name>dfs.https.need.client.auth</name>
+  <value>false</value>
+  <description>Whether SSL client certificate authentication is required
+  </description>
+</property>
+
+<property>
+  <name>dfs.https.server.keystore.resource</name>
+  <value>ssl-server.xml</value>
+  <description>Resource file from which ssl server keystore
+  information will be extracted
+  </description>
+</property>
+
+<property>
+  <name>dfs.https.client.keystore.resource</name>
+  <value>ssl-client.xml</value>
+  <description>Resource file from which ssl client keystore
+  information will be extracted
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.https.address</name>
+  <value>0.0.0.0:50475</value>
+</property>
+
+<property>
+  <name>dfs.https.address</name>
+  <value>0.0.0.0:50470</value>
+</property>
+
+ <property>
+  <name>dfs.datanode.dns.interface</name>
+  <value>default</value>
+  <description>The name of the Network Interface from which a data node should 
+  report its IP address.
+  </description>
+ </property>
+ 
+<property>
+  <name>dfs.datanode.dns.nameserver</name>
+  <value>default</value>
+  <description>The host name or IP address of the name server (DNS)
+  which a DataNode should use to determine the host name used by the
+  NameNode for communication and display purposes.
+  </description>
+ </property>
+ 
+ 
+ 
+<property>
+  <name>dfs.replication.considerLoad</name>
+  <value>true</value>
+  <description>Decide if chooseTarget considers the target's load or not
+  </description>
+</property>
+<property>
+  <name>dfs.default.chunk.view.size</name>
+  <value>32768</value>
+  <description>The number of bytes to view for a file on the browser.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.du.reserved</name>
+  <value>0</value>
+  <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
+  </description>
+</property>
+
+<property>
+  <name>dfs.name.dir</name>
+  <value>${hadoop.tmp.dir}/dfs/name</value>
+  <description>Determines where on the local filesystem the DFS name node
+      should store the name table(fsimage).  If this is a comma-delimited list
+      of directories then the name table is replicated in all of the
+      directories, for redundancy. </description>
+</property>
+
+<property>
+  <name>dfs.name.edits.dir</name>
+  <value>${dfs.name.dir}</value>
+  <description>Determines where on the local filesystem the DFS name node
+      should store the transaction (edits) file. If this is a comma-delimited list
+      of directories then the transaction file is replicated in all of the 
+      directories, for redundancy. Default value is same as dfs.name.dir
+  </description>
+</property>
+<property>
+  <name>dfs.web.ugi</name>
+  <value>webuser,webgroup</value>
+  <description>The user account used by the web interface.
+    Syntax: USERNAME,GROUP1,GROUP2, ...
+  </description>
+</property>
+
+<property>
+  <name>dfs.permissions</name>
+  <value>true</value>
+  <description>
+    If "true", enable permission checking in HDFS.
+    If "false", permission checking is turned off,
+    but all other behavior is unchanged.
+    Switching from one parameter value to the other does not change the mode,
+    owner or group of files or directories.
+  </description>
+</property>
+
+<property>
+  <name>dfs.permissions.supergroup</name>
+  <value>supergroup</value>
+  <description>The name of the group of super-users.</description>
+</property>
+
+<property>
+  <name>dfs.block.access.token.enable</name>
+  <value>false</value>
+  <description>
+    If "true", access tokens are used as capabilities for accessing datanodes.
+    If "false", no access tokens are checked on accessing datanodes.
+  </description>
+</property>
+
+<property>
+  <name>dfs.block.access.key.update.interval</name>
+  <value>600</value>
+  <description>
+    Interval in minutes at which namenode updates its access keys.
+  </description>
+</property>
+
+<property>
+  <name>dfs.block.access.token.lifetime</name>
+  <value>600</value>
+  <description>The lifetime of access tokens in minutes.</description>
+</property>
+
+
+<property>
+  <name>dfs.data.dir</name>
+  <value>${hadoop.tmp.dir}/dfs/data</value>
+  <description>Determines where on the local filesystem an DFS data node
+  should store its blocks.  If this is a comma-delimited
+  list of directories, then data will be stored in all named
+  directories, typically on different devices.
+  Directories that do not exist are ignored.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.data.dir.perm</name>
+  <value>755</value>
+  <description>Permissions for the directories on on the local filesystem where 
+  the DFS data node store its blocks. The permissions can either be octal or 
+  symbolic.</description>
+</property>
+
+<property>
+  <name>dfs.replication</name>
+  <value>3</value>
+  <description>Default block replication. 
+  The actual number of replications can be specified when the file is created.
+  The default is used if replication is not specified in create time.
+  </description>
+</property>
+
+<property>
+  <name>dfs.replication.max</name>
+  <value>512</value>
+  <description>Maximal block replication. 
+  </description>
+</property>
+
+<property>
+  <name>dfs.replication.min</name>
+  <value>1</value>
+  <description>Minimal block replication. 
+  </description>
+</property>
+
+<property>
+  <name>dfs.block.size</name>
+  <value>67108864</value>
+  <description>The default block size for new files.</description>
+</property>
+
+<property>
+  <name>dfs.df.interval</name>
+  <value>60000</value>
+  <description>Disk usage statistics refresh interval in msec.</description>
+</property>
+
+<property>
+  <name>dfs.client.block.write.retries</name>
+  <value>3</value>
+  <description>The number of retries for writing blocks to the data nodes, 
+  before we signal failure to the application.
+  </description>
+</property>
+
+<property>
+  <name>dfs.blockreport.intervalMsec</name>
+  <value>3600000</value>
+  <description>Determines block reporting interval in milliseconds.</description>
+</property>
+
+<property>
+  <name>dfs.blockreport.initialDelay</name>  <value>0</value>
+  <description>Delay for first block report in seconds.</description>
+</property>
+
+<property>
+  <name>dfs.heartbeat.interval</name>
+  <value>3</value>
+  <description>Determines datanode heartbeat interval in seconds.</description>
+</property>
+
+<property>
+  <name>dfs.namenode.handler.count</name>
+  <value>10</value>
+  <description>The number of server threads for the namenode.</description>
+</property>
+
+<property>
+  <name>dfs.safemode.threshold.pct</name>
+  <value>0.999f</value>
+  <description>
+    Specifies the percentage of blocks that should satisfy 
+    the minimal replication requirement defined by dfs.replication.min.
+    Values less than or equal to 0 mean not to wait for any particular
+    percentage of blocks before exiting safemode.
+    Values greater than 1 will make safe mode permanent.
+  </description>
+ </property>
+ 
+<property>
+  <name>dfs.namenode.safemode.min.datanodes</name>
+  <value>0</value>
+  <description>
+    Specifies the number of datanodes that must be considered alive
+    before the name node exits safemode.
+    Values less than or equal to 0 mean not to take the number of live
+    datanodes into account when deciding whether to remain in safe mode
+    during startup.
+    Values greater than the number of datanodes in the cluster
+    will make safe mode permanent.
+  </description>
+</property>
+
+<property>
+  <name>dfs.safemode.extension</name>
+  <value>30000</value>
+  <description>
+    Determines extension of safe mode in milliseconds 
+    after the threshold level is reached.
+  </description>
+</property>
+
+<property>
+  <name>dfs.balance.bandwidthPerSec</name>
+  <value>1048576</value>
+  <description>
+        Specifies the maximum amount of bandwidth that each datanode
+        can utilize for the balancing purpose in term of
+        the number of bytes per second.
+  </description>
+</property>
+
+<property>
+  <name>dfs.hosts</name>
+  <value></value>
+  <description>Names a file that contains a list of hosts that are
+  permitted to connect to the namenode. The full pathname of the file
+  must be specified.  If the value is empty, all hosts are
+  permitted.</description>
+</property>
+
+<property>
+  <name>dfs.hosts.exclude</name>
+  <value></value>
+  <description>Names a file that contains a list of hosts that are
+  not permitted to connect to the namenode.  The full pathname of the
+  file must be specified.  If the value is empty, no hosts are
+  excluded.</description>
+</property> 
+
+<property>
+  <name>dfs.max.objects</name>
+  <value>0</value>
+  <description>The maximum number of files, directories and blocks
+  dfs supports. A value of zero indicates no limit to the number
+  of objects that dfs supports.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.decommission.interval</name>
+  <value>30</value>
+  <description>Namenode periodicity in seconds to check if decommission is 
+  complete.</description>
+</property>
+
+<property>
+  <name>dfs.namenode.decommission.nodes.per.interval</name>
+  <value>5</value>
+  <description>The number of nodes namenode checks if decommission is complete
+  in each dfs.namenode.decommission.interval.</description>
+</property>
+
+<property>
+  <name>dfs.replication.interval</name>
+  <value>3</value>
+  <description>The periodicity in seconds with which the namenode computes 
+  repliaction work for datanodes. </description>
+</property>
+
+<property>
+  <name>dfs.access.time.precision</name>
+  <value>3600000</value>
+  <description>The access time for HDFS file is precise upto this value. 
+               The default value is 1 hour. Setting a value of 0 disables
+               access times for HDFS.
+  </description>
+</property>
+
+<property>
+  <name>dfs.support.append</name>
+  <description>
+    This option is no longer supported. HBase no longer requires that
+    this option be enabled as sync is now enabled by default. See
+    HADOOP-8230 for additional information.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.delegation.key.update-interval</name>
+  <value>86400000</value>
+  <description>The update interval for master key for delegation tokens 
+       in the namenode in milliseconds.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.delegation.token.max-lifetime</name>
+  <value>604800000</value>
+  <description>The maximum lifetime in milliseconds for which a delegation 
+      token is valid.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.delegation.token.renew-interval</name>
+  <value>86400000</value>
+  <description>The renewal interval for delegation token in milliseconds.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.failed.volumes.tolerated</name>
+  <value>0</value>
+  <description>The number of volumes that are allowed to
+  fail before a datanode stops offering service. By default
+  any volume failure will cause a datanode to shutdown.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.max.xcievers</name>
+  <value>4096</value>
+  <description>Specifies the maximum number of threads to use for transferring data
+  in and out of the DN.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.use.datanode.hostname</name>
+  <value>false</value>
+  <description>Whether clients should use datanode hostnames when
+    connecting to datanodes.
+  </description>
+</property>
+
+<property>
+  <name>dfs.datanode.use.datanode.hostname</name>
+  <value>false</value>
+  <description>Whether datanodes should use datanode hostnames when
+    connecting to other datanodes for data transfer.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.local.interfaces</name>
+  <value></value>
+  <description>A comma separated list of network interface names to use
+    for data transfer between the client and datanodes. When creating
+    a connection to read from or write to a datanode, the client
+    chooses one of the specified interfaces at random and binds its
+    socket to the IP of that interface. Individual names may be
+    specified as either an interface name (eg "eth0"), a subinterface
+    name (eg "eth0:0"), or an IP address (which may be specified using
+    CIDR notation to match a range of IPs).
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.kerberos.internal.spnego.principal</name>
+  <value>${dfs.web.authentication.kerberos.principal}</value>
+</property>
+
+<property>
+  <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
+  <value>${dfs.web.authentication.kerberos.principal}</value>
+</property>
+
+<property>
+  <name>dfs.namenode.invalidate.work.pct.per.iteration</name>
+  <value>0.32f</value>
+  <description>
+    *Note*: Advanced property. Change with caution.
+    This determines the percentage amount of block
+    invalidations (deletes) to do over a single DN heartbeat
+    deletion command. The final deletion count is determined by applying this
+    percentage to the number of live nodes in the system.
+    The resultant number is the number of blocks from the deletion list
+    chosen for proper invalidation over a single heartbeat of a single DN.
+    Value should be a positive, non-zero percentage in float notation (X.Yf),
+    with 1.0f meaning 100%.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
+  <value>2</value>
+  <description>
+    *Note*: Advanced property. Change with caution.
+    This determines the total amount of block transfers to begin in
+    parallel at a DN, for replication, when such a command list is being
+    sent over a DN heartbeat by the NN. The actual number is obtained by
+    multiplying this multiplier with the total number of live nodes in the
+    cluster. The result number is the number of blocks to begin transfers
+    immediately for, per DN heartbeat. This number can be any positive,
+    non-zero integer.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.check.stale.datanode</name>
+  <value>false</value>
+  <description>
+  	Indicate whether or not to check "stale" datanodes whose 
+  	heartbeat messages have not been received by the namenode 
+  	for more than a specified time interval. If this configuration 
+  	parameter is set as true, the stale datanodes will be moved to 
+  	the end of the target node list for reading. The writing will 
+  	also try to avoid stale nodes.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.stale.datanode.interval</name>
+  <value>30000</value>
+  <description>
+  	Default time interval for marking a datanode as "stale", i.e., if 
+  	the namenode has not received heartbeat msg from a datanode for 
+  	more than this time interval, the datanode will be marked and treated 
+  	as "stale" by default.
+  </description>
+</property>
+
+</configuration>
--- a/savanna/plugins/vanilla/resources/mapred-default.xml
+++ b/savanna/plugins/vanilla/resources/mapred-default.xml
--- a/savanna/plugins/vanilla/resources/setup-general.sh.template
+++ b/savanna/plugins/vanilla/resources/setup-general.sh.template
@ -0,0 +1,20 @@
+#!/bin/bash
+
+echo "----- Setting up Hadoop enviroment config"
+
+{% for envconf in env_configs -%}
+  echo "{{envconf}}" >> /tmp/hadoop-env.sh
+{% endfor %}
+
+cat /etc/hadoop/hadoop-env.sh >> /tmp/hadoop-env.sh
+mv /tmp/hadoop-env.sh /etc/hadoop/hadoop-env.sh
+
+
+echo "----- Creating directories permissions"
+
+#TODO(aignatov): Need to put here /mnt via args in the future when HDFS placement feature will be ready
+chown -R hadoop:hadoop /mnt
+chmod -R 755 /mnt
+
+{% block master %}
+{% endblock %}
--- a/savanna/plugins/vanilla/utils.py
+++ b/savanna/plugins/vanilla/utils.py
@ -0,0 +1,51 @@
+# Copyright (c) 2013 Mirantis Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def get_node_groups(cluster, proc_list=list()):
+    proc_list = [proc_list] if type(proc_list) in [str, unicode] else proc_list
+    return [ng for ng in cluster.node_groups
+            if set(proc_list).issubset(ng.node_processes)]
+
+
+def get_instances(cluster, proc_list=list()):
+    nodes = get_node_groups(cluster, proc_list)
+    return reduce(lambda a, b: a + b.instances, nodes, [])
+
+
+def get_namenode(cluster):
+    nn = get_instances(cluster, "namenode")
+    return nn[0] if nn else None
+
+
+def get_jobtracker(cluster):
+    jt = get_instances(cluster, "jobtracker")
+    return jt[0] if jt else None
+
+
+def get_datanodes(cluster):
+    return get_instances(cluster, 'datanode')
+
+
+def get_tasktrackers(cluster):
+    return get_instances(cluster, 'tasktracker')
+
+
+def get_secondarynamenodes(cluster):
+    return get_instances(cluster, 'secondarynamenode')
+
+
+def generate_host_names(nodes):
+    return "\n".join([n.hostname for n in nodes])
--- a/savanna/tests/unit/plugins/init.py
+++ b/savanna/tests/unit/plugins/init.py
--- a/savanna/tests/unit/plugins/vanilla/init.py
+++ b/savanna/tests/unit/plugins/vanilla/init.py
--- a/savanna/tests/unit/plugins/vanilla/test_plugin.py
+++ b/savanna/tests/unit/plugins/vanilla/test_plugin.py
@ -0,0 +1,53 @@
+# Copyright (c) 2013 Mirantis Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest2
+
+from savanna.db import models as m
+from savanna.plugins.vanilla import exceptions as ex
+from savanna.plugins.vanilla import plugin as p
+
+
+class VanillaPluginTest(unittest2.TestCase):
+    def setUp(self):
+        self.pl = p.VanillaProvider()
+        self.cl = m.Cluster("cluster1", "tenant1", "vanilla", "1.1.2")
+        self.ng1 = m.NodeGroup("nn", "f1", ["namenode"], 1)
+        self.ng2 = m.NodeGroup("jt", "f1", ["jobtracker"], 1)
+        self.ng3 = m.NodeGroup("tt", "f1", ["tasktracker"], 10)
+
+    def test_validate(self):
+        self.cl.node_groups = [self.ng1]
+        self.pl.validate(self.cl)
+        with self.assertRaises(ex.NotSingleNameNodeException):
+            self.ng1.count = 0
+            self.pl.validate(self.cl)
+        with self.assertRaises(ex.NotSingleNameNodeException):
+            self.ng1.count = 2
+            self.pl.validate(self.cl)
+        self.ng1.count = 1
+
+        self.cl.node_groups.append(self.ng2)
+        self.pl.validate(self.cl)
+        with self.assertRaises(ex.NotSingleJobTrackerException):
+            self.ng2.count = 2
+            self.pl.validate(self.cl)
+
+        self.cl.node_groups.append(self.ng3)
+        self.ng2.count = 1
+        self.pl.validate(self.cl)
+        with self.assertRaises(ex.TaskTrackersWithoutJobTracker):
+            self.ng2.count = 0
+            self.pl.validate(self.cl)
--- a/savanna/tests/unit/plugins/vanilla/test_utils.py
+++ b/savanna/tests/unit/plugins/vanilla/test_utils.py
@ -0,0 +1,58 @@
+# Copyright (c) 2013 Mirantis Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest2
+
+from savanna.db import models as m
+from savanna.plugins.vanilla import utils as u
+
+
+class VanillaUtilsTest(unittest2.TestCase):
+    def setUp(self):
+        self.c1 = m.Cluster("cluster1", "tenant1", "vanilla", "1.1.2")
+        self.ng1 = m.NodeGroup("master", "f1", ["jt", "nn"], 1)
+        self.ng2 = m.NodeGroup("workers", "f1", ["tt", "dn"], 3)
+        self.ng3 = m.NodeGroup("sn", "f1", ["dn"], 1)
+        self.c1.node_groups = [self.ng1, self.ng2, self.ng3]
+
+        self.ng1.instances = [m.Instance("ng1", "i1", "master")]
+        self.ng2.instances = [m.Instance("ng2", "i2", "worker1"),
+                              m.Instance("ng2", "i3", "worker2"),
+                              m.Instance("ng2", "i4", "worker3")]
+        self.ng3.instances = [m.Instance("ng3", "i5", "sn")]
+
+    def test_get_node_groups(self):
+        self.assertListEqual(u.get_node_groups(self.c1), self.c1.node_groups)
+        self.assertListEqual(u.get_node_groups(self.c1, ["wrong-process"]), [])
+        self.assertListEqual(u.get_node_groups(self.c1, ['dn', 'tt']),
+                             [self.ng2])
+        self.assertListEqual(u.get_node_groups(self.c1, 'dn'),
+                             [self.ng2, self.ng3])
+        self.assertListEqual(u.get_node_groups(self.c1, ['dn']),
+                             [self.ng2, self.ng3])
+        self.assertListEqual(u.get_node_groups(self.c1, ['jt', 'tt']), [])
+
+    def test_get_instances(self):
+        self.assertEquals(len(u.get_instances(self.c1)), 5)
+        self.assertListEqual(u.get_instances(self.c1, 'wrong-process'), [])
+        self.assertListEqual(u.get_instances(self.c1, 'nn'),
+                             self.ng1.instances)
+        self.assertListEqual(u.get_instances(self.c1, 'dn'),
+                             self.ng2.instances + self.ng3.instances)
+
+    def test_generate_lines_from_list(self):
+        self.assertEquals(u.generate_host_names(self.ng2.instances),
+                          "worker1\nworker2\nworker3")
+        self.assertEquals(u.generate_host_names([]), "")
--- a/setup.py
+++ b/setup.py
@ -27,8 +27,8 @@ setuptools.setup(
    cmdclass=common_setup.get_cmdclass(),
    packages=setuptools.find_packages(exclude=['bin']),
    package_data={'savanna': [
-        'resources/*.template',
-        'resources/*.xml'
+        'plugins/vanilla/resources/*.template',
+        'plugins/vanilla/resources/*.xml'
    ]},
    install_requires=requires,
    dependency_links=depend_links,