Top Spark Streaming Interview Questions
Lab13: Spark-streaming
#Howtostart
sudo yum update
spark-shell
#steps:
#create a folder spark-streaming and go to the folder
mkdir spark-streaming
#go to spark-streaming folder
cd spark-streaming
git clone https://github.com/databricks/spark-training.git
#go to scala folder
cd spark-training/streaming/scala
#Twitter Setup
#open twitter setting page https://apps.twitter.com/
#This page lists the set of Twitter-based applications that you own and have already created consumer keys and access tokens for. This list will be #empty if you have never created any applications.create a new temporary application.To do this, click on the blue “Create a new application” button.
cd /spark-streaming/spark-training/streaming/scala
vi Tutorial.scala
#import org.apache.spark._
#import org.apache.spark.SparkContext._
#import org.apache.spark.streaming._
#import org.apache.spark.streaming.twitter._
#import org.apache.spark.streaming.StreamingContext._
#import TutorialHelper._
#object Tutorial {
#def main(args: Array[String]) {
# // Checkpoint directory
# val checkpointDir = TutorialHelper.getCheckpointDirectory()
# // Configure Twitter credentials
val apiKey = “”
val apiSecret = “”
val accessToken = “”
val accessTokenSecret = “”
#TutorialHelper.configureTwitterCredentials(apiKey, apiSecret, accessToken, accessTokenSecret)
#// Your code goes here
# val ssc = new StreamingContext(new SparkConf(), Seconds(1))
# val tweets = TwitterUtils.createStream(ssc, None)
#val statuses = tweets.map(status => status.getText())
#statuses.print()
#ssc.checkpoint(checkpointDir)
#ssc.start()
#ssc.awaitTermination()
# }
#}
#we will use ../../sbt/sbt assembly command will compile the Tutorial class and create a JAR file in /streaming/scala/target/scala-2.10/. It will #take some time to build.
../../sbt/sbt assembly
#we will use spark-submit to execute our program for this lab
spark-submit –class Tutorial ../../streaming/scala/target/scala-2.10/Tutorial-assembly-0.1-SNAPSHOT.jar