add impl of SparkWordCount example
Change-Id: Ie377e3313f68cde6d580b93be8e15e364fc56718
This commit is contained in:
parent
e25d9d7c74
commit
881409ed7b
Binary file not shown.
|
@ -0,0 +1,61 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
not use this file except in compliance with the License. You may obtain
|
||||
copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>sahara.edp.spark</groupId>
|
||||
<artifactId>sparkwordcount</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
<name>"Spark Word Count"</name>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.scala-tools</groupId>
|
||||
<artifactId>maven-scala-plugin</artifactId>
|
||||
<version>2.15.2</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>1.6</source>
|
||||
<target>1.6</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>2.10.4</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.10 -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.10</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sahara.edp.spark
|
||||
|
||||
import org.apache.spark.SparkContext
|
||||
import org.apache.spark.SparkContext._
|
||||
import org.apache.spark.SparkConf
|
||||
|
||||
object SparkWordCount {
|
||||
def main(args: Array[String]) {
|
||||
val sc = new SparkContext(new SparkConf().setAppName("Spark Count"))
|
||||
val dfsFileName = args(1)
|
||||
// split each document into words
|
||||
val tokenized = sc.textFile(args(0)).flatMap(_.split(" "))
|
||||
|
||||
// count the occurrence of each word
|
||||
val wordCounts = tokenized.map((_, 1)).reduceByKey(_ + _)
|
||||
|
||||
val fileRDD = sc.parallelize(wordCounts.collect())
|
||||
fileRDD.saveAsTextFile(dfsFileName)
|
||||
}
|
||||
}
|
|
@ -85,6 +85,7 @@ edp_jobs_flow:
|
|||
fs.swift.service.sahara.password: ${os_password}
|
||||
args:
|
||||
- '{input_datasource}'
|
||||
- '{output_datasource}'
|
||||
mapr_pig_job:
|
||||
- type: Pig
|
||||
input_datasource:
|
||||
|
|
Loading…
Reference in New Issue