-
Notifications
You must be signed in to change notification settings - Fork 120
Installation on EC2 or linux machine
- Install anaconda2 using python 2.7
- Install Spark wget https://www.apache.org/dyn/closer.lua/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
- Intall java 8
- Install hadoop
export PATH="/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/opt/aws/bin:/home/ec2-user/.local/bin:/home/ec2-user/bin:/opt/aws/bin:/opt/aws/bin:/home/ec2-user/anaconda2/bin" export HOME='/home/ec2-user'
export HADOOP_HOME=$HOME/hadoop-2.7.0
export HADOOP_CONF_DIR=$HOME/hadoop-2.7.0/etc/hadoop
export HADOOP_MAPRED_HOME=$HOME/hadoop-2.7.0
export HADOOP_COMMON_HOME=$HOME/hadoop-2.7.0
export HADOOP_HDFS_HOME=$HOME/hadoop-2.7.0
export YARN_HOME=$HOME/hadoop-2.7.0
#export JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.141.x86_64/jre export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.151-1.b12.35.amzn1.x86_64/jre export PATH=$PATH:$JAVA_HOME/bin
export PATH=$PATH:$HOME/hadoop-2.7.0/bin
export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='notebook --ip 0.0.0.0'