From 9d9a92c912cbac5f2081c095c7d3d01d7cd046b8 Mon Sep 17 00:00:00 2001
From: jdestin <jeremy.destin@inra.fr>
Date: Wed, 27 May 2020 14:58:24 +0200
Subject: [PATCH 1/3] fix:Create a folder to store the logs

---
 scripts/harvest.sh | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/scripts/harvest.sh b/scripts/harvest.sh
index 07054373..b774b717 100755
--- a/scripts/harvest.sh
+++ b/scripts/harvest.sh
@@ -119,6 +119,22 @@ for FILE in $(find ${DATA_DIR} -name "*.json"); do
 	gzip $FILE
 done
 
+LOG_DIR=${DATA_DIR}"log/"
+export TMP_LOG_FILE
+
+if [[ -d ${LOG_DIR} ]]
+then
+    rm -r "${LOG_DIR}"
+    mkdir "${LOG_DIR}"
+else
+    mkdir "${LOG_DIR}"
+fi
+
+
+basename_dir() { "$1$(basename "$(dirname "$2")")/$3" ; }
+
+export -f basename_dir
+
 for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	echo && echo -e "${BOLD}Manage ${DOCUMENT_TYPE} documents...${NC}"
 	INDEX_PATTERN=$(echo "faidare_${DOCUMENT_TYPE}_${ENV}" | sed -E "s/([a-z])([A-Z])/\1-\2/" | tr '[:upper:]' '[:lower:]')
@@ -141,6 +157,12 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	echo -e "* Index documents into ${ES_HOST}:${ES_PORT}/${INDEX_NAME} indice..."
 	{
 		parallel -j 2 --bar "
+
+		echo test1
+		    #echo '{= s:.*/[^_]*_:sub/:; =}'
+		    echo {=s:/*::;=}
+            echo test2
+
 			curl -s -H 'Content-Type: application/x-ndjson' -H 'Content-Encoding: gzip' -H 'Accept-Encoding: gzip' -XPOST ${ES_HOST}:${ES_PORT}/${INDEX_NAME}/_bulk --data-binary '@{}' > {.}.log.gz" \
 		::: $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}-*.json.gz")
 	} || {
@@ -173,6 +195,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 		echo -e "${ORANGE}Expected ${COUNT_EXTRACTED_DOCS} documents but got ${COUNT_INDEXED_DOCS} indexed documents.${NC}"
 		exit 1;
 	fi
+	sleep 5
 
 	# Add aliases
 	ALIAS_PATTERN="${INDEX_PATTERN}-group*"
-- 
GitLab


From 612f9997c20a86e03a03a8041692304aa465e480 Mon Sep 17 00:00:00 2001
From: jdestin <jeremy.destin@inra.fr>
Date: Thu, 30 Jul 2020 16:19:09 +0200
Subject: [PATCH 2/3] Rebase on master. Create and copy the log in a dedicated
 repository. GNP-5861

---
 scripts/harvest.sh | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/scripts/harvest.sh b/scripts/harvest.sh
index b774b717..bda1f2e6 100755
--- a/scripts/harvest.sh
+++ b/scripts/harvest.sh
@@ -119,21 +119,29 @@ for FILE in $(find ${DATA_DIR} -name "*.json"); do
 	gzip $FILE
 done
 
-LOG_DIR=${DATA_DIR}"log/"
-export TMP_LOG_FILE
+LOG_DIR="${DATA_DIR%/}/log"
 
 if [[ -d ${LOG_DIR} ]]
 then
     rm -r "${LOG_DIR}"
-    mkdir "${LOG_DIR}"
-else
-    mkdir "${LOG_DIR}"
 fi
 
+export ES_HOST ES_PORT INDEX_NAME LOG_DIR
 
-basename_dir() { "$1$(basename "$(dirname "$2")")/$3" ; }
+process_file() {
+    file=$(basename "$1")
+    logfile="${file%.*}.log.gz"
+    source=$(basename "$(dirname "$1")")
 
-export -f basename_dir
+    if ! [[ -d "${LOG_DIR}/$source" ]]
+    then
+        mkdir -p "${LOG_DIR}/$source"
+    fi
+
+    curl -s -H 'Content-Type: application/x-ndjson' -H 'Content-Encoding: gzip' -H 'Accept-Encoding: gzip' -XPOST "${ES_HOST}:${ES_PORT}/${INDEX_NAME}/_bulk" --data-binary @"$1" > "${LOG_DIR}/$source/$logfile"
+}
+
+export -f process_file
 
 for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	echo && echo -e "${BOLD}Manage ${DOCUMENT_TYPE} documents...${NC}"
@@ -156,15 +164,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	INDEX_NAME="${INDEX_PATTERN}-d"$(date +%s)
 	echo -e "* Index documents into ${ES_HOST}:${ES_PORT}/${INDEX_NAME} indice..."
 	{
-		parallel -j 2 --bar "
-
-		echo test1
-		    #echo '{= s:.*/[^_]*_:sub/:; =}'
-		    echo {=s:/*::;=}
-            echo test2
-
-			curl -s -H 'Content-Type: application/x-ndjson' -H 'Content-Encoding: gzip' -H 'Accept-Encoding: gzip' -XPOST ${ES_HOST}:${ES_PORT}/${INDEX_NAME}/_bulk --data-binary '@{}' > {.}.log.gz" \
-		::: $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}-*.json.gz")
+		parallel -j 2 --bar process_file ::: $(find "${DATA_DIR}" -name "${DOCUMENT_TYPE}-*.json.gz")
 	} || {
 		code=$?
 		echo -e "${RED}ERROR: a problem occurred when trying to index data with parallel program.${NC}"
-- 
GitLab


From bcc5c1d9b29fb58c6431cfaf0ad44e1710247d18 Mon Sep 17 00:00:00 2001
From: jdestin <jeremy.destin@inra.fr>
Date: Thu, 30 Jul 2020 16:52:03 +0200
Subject: [PATCH 3/3] Rename indexing repository. GNP-5861

---
 scripts/harvest.sh | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/harvest.sh b/scripts/harvest.sh
index bda1f2e6..86379b8c 100755
--- a/scripts/harvest.sh
+++ b/scripts/harvest.sh
@@ -52,7 +52,7 @@ check_acknowledgment() {
 	local MSG=$2
 	echo $LOG | jq '.acknowledged? == true' | grep 'true' >/dev/null || {
 		echo -e "${RED}ERROR: a problem occurred when trying to ${MSG}.${NC}"
-		echo -e "${ORANGE}$(echo ${LOG})${NC}"
+		echo -e "${ORANGE}$(echo "${LOG}")${NC}"
 		exit 1;
 	}
 }
@@ -61,7 +61,7 @@ check_acknowledgment() {
 MISSING_COUNT=0
 PROGRAMS="gzip parallel jq"
 for PROGRAM in ${PROGRAMS}; do
-	command -v ${PROGRAM} >/dev/null || {
+	command -v "${PROGRAM}" >/dev/null || {
 		echo -e "${ORANGE}Program ${PROGRAM} is missing, cannot continue...${NC}"
 		((MISSING_COUNT += 1))
 	}
@@ -102,24 +102,24 @@ if [ ! -d "${DATA_DIR}" ]; then
 	echo -e "${RED}ERROR: Mandatory parameter 'jsonDir' is missing!${NC}"
 	echo && help
 fi
-if [ $(find ${DATA_DIR} -name "*.json" | wc -l) -le 0 ] && [ $(find ${DATA_DIR} -name "*.json.gz" | wc -l) -le 0 ]; then
+if [ $(find "${DATA_DIR}" -name "*.json" | wc -l) -le 0 ] && [ $(find "${DATA_DIR}" -name "*.json.gz" | wc -l) -le 0 ]; then
 	echo -e "${RED}ERROR: The JSON directory ${DATA_DIR} contains no JSON files!${NC}"
 	echo && help
 fi
 [ "${DOCUMENT_TYPES}" == "all" ] && DOCUMENT_TYPES="${ALL_DOCUMENT_TYPES}"
 for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
-	if [ $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}*.json" | wc -l) -le 0 ] && [ $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}*.json.gz" | wc -l) -le 0 ]; then
+	if [ $(find "${DATA_DIR}" -name "${DOCUMENT_TYPE}*.json" | wc -l) -le 0 ] && [ $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}*.json.gz" | wc -l) -le 0 ]; then
 		echo -e "${ORANGE}WARNING: The JSON directory ${DATA_DIR} contains no ${DOCUMENT_TYPE} document. Type will be ignored!${NC}"
 		DOCUMENT_TYPES=$(echo "${DOCUMENT_TYPES}" | sed "s/ *${DOCUMENT_TYPE} */ /g")
 	fi
 done
 
 # Compress JSON files
-for FILE in $(find ${DATA_DIR} -name "*.json"); do
-	gzip $FILE
+for FILE in $(find "${DATA_DIR}" -name "*.json"); do
+	gzip "$FILE"
 done
 
-LOG_DIR="${DATA_DIR%/}/log"
+LOG_DIR="${DATA_DIR%/}/indexing-log"
 
 if [[ -d ${LOG_DIR} ]]
 then
@@ -155,8 +155,8 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	\"index_patterns\": [\"${INDEX_PATTERN}-*\"],
 	\"order\": 101,
 	\"mappings\":
-		$(cat ${BASEDIR}/../backend/src/test/resources/fr/inra/urgi/faidare/repository/es/setup/index/${DOCUMENT_TYPE}_mapping.json),
-	\"settings\": $(cat ${BASEDIR}/../backend/src/test/resources/fr/inra/urgi/faidare/repository/es/setup/index/settings.json)
+		$(cat "${BASEDIR}"/../backend/src/test/resources/fr/inra/urgi/faidare/repository/es/setup/index/${DOCUMENT_TYPE}_mapping.json),
+	\"settings\": $(cat "${BASEDIR}"/../backend/src/test/resources/fr/inra/urgi/faidare/repository/es/setup/index/settings.json)
 }")
 	check_acknowledgment "${LOG}" "create template"
 
@@ -170,7 +170,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 		echo -e "${RED}ERROR: a problem occurred when trying to index data with parallel program.${NC}"
 		exit $code
 	}
-	parallel "gunzip -c {} | jq '.errors' | grep -q true && echo -e '${ORANGE}ERROR found in {}${NC}' >> ${TMP_FILE} ;" ::: $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}-*.log.gz")
+	parallel "gunzip -c {} | jq '.errors' | grep -q true && echo -e '${ORANGE}ERROR found in {}${NC}' >> ${TMP_FILE} ;" ::: $(find "${DATA_DIR}" -name "${DOCUMENT_TYPE}-*.log.gz")
 	if [ -f "${TMP_FILE}" ] && [ -s "${TMP_FILE}" ]; then
 		echo -e "${RED}ERROR: a problem occurred when trying to index data into ${ES_HOST}:${ES_PORT}/${INDEX_NAME} indice.${NC}"
 		echo -e "${ORANGE}$(cat ${TMP_FILE})${NC}"
-- 
GitLab