-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
executable file
·88 lines (67 loc) · 2.74 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/bin/sh
set -x
set -e
hdfs dfs -ls wcplus || hdfs dfs -mkdir wcplus
hdfs dfs -ls wcplus/books || hdfs dfs -copyFromLocal books wcplus
SBT_OPTIONS="-DCASCADING_FABRIC=${CASCADING_FABRIC:-hadoop2-tez}"
if [ ! -z $CASCADING_VERSION ]
then
SBT_OPTIONS="$SBT_OPTIONS -DCASCADING_VERSION=${CASCADING_VERSION}"
fi
sbt $SBT_OPTIONS assembly
export HADOOP_HEAPSIZE=1024
#export HADOOP_HEAPSIZE=6000
FLAGS="-Xmx${HADOOP_HEAPSIZE}m"
FLAGS="$FLAGS -Dcascading.planner.plan.path=/tmp/plan-wcplus/plan.lst"
FLAGS="$FLAGS -Dcascading.planner.stats.path=/tmp/plan-wcplus/stats.lst"
FLAGS="$FLAGS -Dcascading.planner.plan.transforms.path=/tmp/plan-wcplus/xforms.lst"
#FLAGS="$FLAGS -Dcascading.cascade.maxconcurrentflows=1"
#FLAGS="$FLAGS -Dtest.profile.node=E4DB7E97D232413E91842D31D53B5F19 -Dtest.profile.path=/tmp/jfr/"
#FLAGS="$FLAGS -Dcascading.management.document.service.archive.dir=/tmp/plan-wcplus/docserv.archive"
#FLAGS="$FLAGS -Dcascading.stats.complete_child_details.block.duration=6000000"
#FLAGS="$FLAGS -Dorg.slf4j.simpleLogger.log.org.apache.tez.runtime.library.common.writers=DEBUG"
#FLAGS="$FLAGS -Dorg.slf4j.simpleLogger.defaultLogLevel=DEBUG -Dsun.io.serialization.extendedDebugInfo=true "
#FLAGS="$FLAGS -Dorg.slf4j.simpleLogger.log.cascading.flow.stream.graph.StreamGraph=DEBUG"
# wtf: not very secure about where and why log4j takes this properties file today. Force it.
FLAGS="$FLAGS -Dlog4j.configuration=file:$PWD/src/main/resources/log4j.properties"
JAR=`ls -t target/scala-2.11/wcplus-*.jar|head -1`
DRIVEN=./`ls -t driven-plugin-*.jar ||echo driven-plugin-__fetchme__.jar |head -1`
if [ ! -f $DRIVEN ]
then
echo "Driven plug-in not found here. Suggestion: run "
echo " wget -i http://eap.concurrentinc.com/driven/1.3/driven-plugin/latest-jar.txt "
echo "(check it out http://www.cascading.org/driven/ )"
echo "and start again. Script will continue in 10 seconds."
sleep 10
fi
export HADOOP_CLASSPATH=$DRIVEN:$HADOOP_CLASSPATH
TEZ_PARTITIONS=35
case $CASCADING_FABRIC in
hadoop)
FABRIC=--hadoop1
;;
local)
FABRIC=--local
;;
hadoop2-mr1)
FABRIC=--hadoop2-mr1
;;
hadoop2-tez)
FABRIC=--hadoop2-tez
;;
*)
FABRIC=--hadoop2-tez
;;
esac
export HADOOP_OPTS="${FLAGS}"
export HADOOP_CLIENT_OPTS="${FLAGS}"
time hadoop jar $JAR \
com.twitter.scalding.Tool com.transparencyrights.demo.wcplus.ComputeApp \
$FABRIC \
--filter ${FILTER:-true} --manygrams ${NGRAMS:-5} --fakeMedian ${FAKEMEDIAN:-false} --crash ${CRASH:-true} \
--root wcplus \
--tez-partitions $TEZ_PARTITIONS \
--tez.lib.uris hdfs://tpcy-par/apps/tez-0.6/tez-0.6.2-SNAPSHOT-guavafix.tar.gz \
--queue prod
rm -rf target/wcplus ||true
hdfs dfs -copyToLocal wcplus target/wcplus