From 5f32d4a981245c0c299e1c4daa0597d0fa560582 Mon Sep 17 00:00:00 2001 From: nkalband Date: Wed, 10 May 2017 12:14:17 +0530 Subject: [PATCH 1/2] changes to include tag details changes to include tag details --- get_spark_params.sh | 8 ++++---- tpcdsExtractJSONtoMongoDB.py | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/get_spark_params.sh b/get_spark_params.sh index 6b25e3d..fc0420f 100755 --- a/get_spark_params.sh +++ b/get_spark_params.sh @@ -8,10 +8,10 @@ executor_memory=`grep executorMemory $logfile | awk {'print $2'}` driver_memory=`grep driverMemory $logfile | awk {'print $2'}` driver_cores=`grep driverCores $logfile | awk {'print $2'}` total_executor_cores=`grep totalExecutorCores $logfile | awk {'print $2'}` -shuffle_partitions=`grep "^spark.sql.shuffle.partitions" $logfile | awk {'print $3'}` -gc_threads=`grep "^spark.executor.extraJavaOptions" $logfile | cut -f3 -d"=" | awk {'print $1'}` -exec_memoryOverhead=`grep "^spark.yarn.executor.memoryOverhead" $logfile | awk {'print $3'}` -driver_memoryOverhead=`grep "^spark.yarn.driver.memoryOverhead" $logfile | awk {'print $3'}` +shuffle_partitions=`grep "spark.sql.shuffle.partitions" $logfile | head -1 |awk -F"," {'print $2'} | sed 's/)//'` +gc_threads=`grep "spark.executor.extraJavaOptions" $logfile | head -1 |cut -f3 -d"=" | awk {'print $1'}` +exec_memoryOverhead=`grep "spark.yarn.executor.memoryOverhead" $logfile | head -1 |awk -F"," {'print $2'} | sed 's/)//'` +driver_memoryOverhead=`grep "spark.yarn.driver.memoryOverhead" $logfile | head -1 |awk -F"," {'print $2'} | sed 's/)//'` echo "{" >${WORKDIR}/spark_params echo "\"num_executors\":\"$num_executors\"," >>${WORKDIR}/spark_params diff --git a/tpcdsExtractJSONtoMongoDB.py b/tpcdsExtractJSONtoMongoDB.py index 554816e..c42c4bf 100755 --- a/tpcdsExtractJSONtoMongoDB.py +++ b/tpcdsExtractJSONtoMongoDB.py @@ -18,7 +18,8 @@ #date=sys.argv[4] #print(logFile) master=sys.argv[4] -git_branch=sys.argv[5] +git_branch="" +git_tag=sys.argv[5] cluster_file=sys.argv[6] spark_param_file=sys.argv[7] tfile=open(logFile, 'rt') @@ -52,6 +53,7 @@ dict_stats["date"]=date_str dict_stats["master"]=master dict_stats["branch"]=git_branch +dict_stats["tag"]=git_tag workloads=[] dict_temp={} metrics_temp=[] From b18412eb78ad36477d082fbe952e14cf9863ced1 Mon Sep 17 00:00:00 2001 From: nkalband Date: Fri, 12 May 2017 17:40:36 +0530 Subject: [PATCH 2/2] to retrived spark params for differnt format log files to retrived spark params for differnt format log files --- get_spark_params.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/get_spark_params.sh b/get_spark_params.sh index fc0420f..96efa9d 100755 --- a/get_spark_params.sh +++ b/get_spark_params.sh @@ -8,10 +8,11 @@ executor_memory=`grep executorMemory $logfile | awk {'print $2'}` driver_memory=`grep driverMemory $logfile | awk {'print $2'}` driver_cores=`grep driverCores $logfile | awk {'print $2'}` total_executor_cores=`grep totalExecutorCores $logfile | awk {'print $2'}` -shuffle_partitions=`grep "spark.sql.shuffle.partitions" $logfile | head -1 |awk -F"," {'print $2'} | sed 's/)//'` +shuffle_partitions=`grep "spark.sql.shuffle.partitions" $logfile | head -1 | sed 's/->//' | sed 's/,/ /' | sed 's/)//' | sed 's/(//' |awk {'print $2'}` gc_threads=`grep "spark.executor.extraJavaOptions" $logfile | head -1 |cut -f3 -d"=" | awk {'print $1'}` -exec_memoryOverhead=`grep "spark.yarn.executor.memoryOverhead" $logfile | head -1 |awk -F"," {'print $2'} | sed 's/)//'` -driver_memoryOverhead=`grep "spark.yarn.driver.memoryOverhead" $logfile | head -1 |awk -F"," {'print $2'} | sed 's/)//'` +exec_memoryOverhead=`grep "spark.yarn.executor.memoryOverhead" $logfile | head -1 | sed 's/->//' | sed 's/,/ /' | sed 's/)//' | sed 's/(//' |awk {'print $2'}` +driver_memoryOverhead=`grep "spark.yarn.driver.memoryOverhead" $logfile | head -1 | sed 's/->//' | sed 's/,/ /' | sed 's/)//' | sed 's/(//' |awk {'print $2'}` + echo "{" >${WORKDIR}/spark_params echo "\"num_executors\":\"$num_executors\"," >>${WORKDIR}/spark_params @@ -26,4 +27,3 @@ echo "\"exec_memoryOverhead\":\"$exec_memoryOverhead\"," >>${WORKDIR}/spark_para echo "\"driver_memoryOverhead\":\"$driver_memoryOverhead\"" >>${WORKDIR}/spark_params echo "}" >>${WORKDIR}/spark_params -