comando di lancio da shell
oozie job -oozie http://sssss.risorse.xxxx:11000/oozie -config job.properties -run
lancio con servizio rest
curl --header "Content-Type: application/xml;charset=UTF-8" \ --request POST \ --data '<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration><property><name>nameNode</name><value>hdfs://nameservice1</value></property><property><name>mode1</name><value>client</value></property><property><name>oozie.wf.application.path</name><value>${nameNode}/user/${user.name}/apps/spark</value></property><property><name>security_enabled</name><value>False</value></property><property><name>filewf</name><value>${nameNode}/user/${user.name}/apps/spark/latam.jar#latam.jar</value></property><property><name>oozie.use.system.libpath</name><value>True</value></property><property><name>confpath</name><value>${nameNode}/user/${user.name}/apps/spark/configuration.properties</value></property><property><name>latamjar</name><value>latam.jar</value></property><property><name>jobTracker</name><value>yarnRM</value></property><property><name>distributrice</name><value>argentina</value></property><property><name>master2</name><value>local[*]</v> /value></property><property><name>user.name</name><value>ae100835</value></property><property><name>hivemetastoreuris</name><value>thrift://elbahidata05.risorse.enel:9083</value></property></configuration>' \ http://elbahidata06.risorse.enel:11000/oozie/v2/jobs?action=start
file necessati:
I file workflow.xml e xxx.jar vanno posizionati su hdfs (insieme a tutte le risorse necessarie es.: un file di configurazione)
hdfs dfs -rm /user/ae100835/apps/spark/latam.jar hdfs dfs -put latam.jar /user/ae100835/apps/spark/ hdfs dfs -rm /user/ae100835/apps/spark/workflow.xml hdfs dfs -put workflow.xml /user/ae100835/apps/spark/ hdfs dfs -chmod 777 /user/ae100835/apps/spark/*
distributrice=argentina nameNode=hdfs://nameservice1 jobTracker=yarnRM master1=yarn mode1=client master2=local[*] latamjar=latam.jar confpath=${nameNode}/user/${user.name}/apps/spark/configuration.properties filewf=${nameNode}/user/${user.name}/apps/spark/latam.jar#latam.jar hivemetastoreuris=thrift://xxxx.risorse.ssss:9083 oozie.use.system.libpath=True security_enabled=False oozie.wf.application.path=${nameNode}/user/${user.name}/apps/spark
<workflow-app xmlns='uri:oozie:workflow:0.5' name='Latam-wf'> <start to='csv-producer' /> <action name='csv-producer'> <spark xmlns="uri:oozie:spark-action:0.2"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <master>${master1}</master> <mode>${mode1}</mode> <name>Spark-csv-producer</name> <class>it.enel.latam.App</class> <jar>${latamjar}</jar> <spark-opts>--driver-memory 2G --executor-memory 4G --num-executors 8</spark-opts> <arg>${distributrice}</arg> <arg>${confpath}</arg> <arg>${hivemetastoreuris}</arg> <file>${filewf}</file> </spark> <ok to="csv-to-db" /> <error to="fail" /> </action> <action name='csv-to-db'> <spark xmlns="uri:oozie:spark-action:0.2"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <master>${master2}</master> <name>Spark-csv-to-db</name> <class>it.enel.latam.AppToDB</class> <jar>${latamjar}</jar> <spark-opts>--driver-memory 2G</spark-opts> <arg>${distributrice}</arg> <arg>${confpath}</arg> <file>${filewf}</file> </spark> <ok to="end" /> <error to="fail" /> </action> <kill name="fail"> <message>Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}] </message> </kill> <end name='end' /> </workflow-app>