OOzie con applicazione Spark

comando di lancio da shell

oozie job -oozie http://sssss.risorse.xxxx:11000/oozie  -config job.properties -run

lancio con servizio rest

curl --header "Content-Type: application/xml;charset=UTF-8" \
 --request POST \
 --data '<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration><property><name>nameNode</name><value>hdfs://nameservice1</value></property><property><name>mode1</name><value>client</value></property><property><name>oozie.wf.application.path</name><value>${nameNode}/user/${user.name}/apps/spark</value></property><property><name>security_enabled</name><value>False</value></property><property><name>filewf</name><value>${nameNode}/user/${user.name}/apps/spark/latam.jar#latam.jar</value></property><property><name>oozie.use.system.libpath</name><value>True</value></property><property><name>confpath</name><value>${nameNode}/user/${user.name}/apps/spark/configuration.properties</value></property><property><name>latamjar</name><value>latam.jar</value></property><property><name>jobTracker</name><value>yarnRM</value></property><property><name>distributrice</name><value>argentina</value></property><property><name>master2</name><value>local[*]</v> /value></property><property><name>user.name</name><value>ae100835</value></property><property><name>hivemetastoreuris</name><value>thrift://elbahidata05.risorse.enel:9083</value></property></configuration>' \
 http://elbahidata06.risorse.enel:11000/oozie/v2/jobs?action=start

file necessati:

  • job.properties : elenco delle properties da passare al workflow.xml
  • workflow.xml: il workflow
  • xxx.jar applicativo spark
  • job.xml: alternativa a job.properties per il lancio in POST

I file workflow.xml e xxx.jar vanno posizionati su hdfs (insieme a tutte le risorse necessarie es.: un file di configurazione)

hdfs dfs -rm /user/ae100835/apps/spark/latam.jar
hdfs dfs -put latam.jar /user/ae100835/apps/spark/
hdfs dfs -rm /user/ae100835/apps/spark/workflow.xml
hdfs dfs -put workflow.xml /user/ae100835/apps/spark/
hdfs dfs -chmod 777 /user/ae100835/apps/spark/*

job.properties

distributrice=argentina
nameNode=hdfs://nameservice1
jobTracker=yarnRM
master1=yarn
mode1=client
master2=local[*]
latamjar=latam.jar
confpath=${nameNode}/user/${user.name}/apps/spark/configuration.properties
filewf=${nameNode}/user/${user.name}/apps/spark/latam.jar#latam.jar
hivemetastoreuris=thrift://xxxx.risorse.ssss:9083
oozie.use.system.libpath=True
security_enabled=False
oozie.wf.application.path=${nameNode}/user/${user.name}/apps/spark

workflow.xml

<workflow-app xmlns='uri:oozie:workflow:0.5' name='Latam-wf'>
    <start to='csv-producer' />

    <action name='csv-producer'>
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>${master1}</master>
            <mode>${mode1}</mode>
            <name>Spark-csv-producer</name>
            <class>it.enel.latam.App</class>
            <jar>${latamjar}</jar>
            <spark-opts>--driver-memory 2G --executor-memory 4G --num-executors 8</spark-opts>
            <arg>${distributrice}</arg>
            <arg>${confpath}</arg>
            <arg>${hivemetastoreuris}</arg>
            <file>${filewf}</file>
        </spark>
        <ok to="csv-to-db" />
        <error to="fail" />
    </action>


    <action name='csv-to-db'>
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>${master2}</master>
            <name>Spark-csv-to-db</name>
            <class>it.enel.latam.AppToDB</class>
            <jar>${latamjar}</jar>
            <spark-opts>--driver-memory 2G</spark-opts>
            <arg>${distributrice}</arg>
            <arg>${confpath}</arg>
            <file>${filewf}</file>
        </spark>
        <ok to="end" />
        <error to="fail" />
    </action>

    <kill name="fail">
        <message>Workflow failed, error
            message[${wf:errorMessage(wf:lastErrorNode())}]
        </message>
    </kill>
    <end name='end' />
</workflow-app>