From 87f6a9b51659933037e7b59fc5c5977ea4d28c20 Mon Sep 17 00:00:00 2001
From: Erik Kimmel <erik.kimmel@inra.fr>
Date: Thu, 2 Jul 2020 11:07:59 +0200
Subject: [PATCH] fix: force the number of CPU used for indexation process.
 GNP-5670.

---
 scripts/harvest.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/harvest.sh b/scripts/harvest.sh
index 3d5504d1..07054373 100755
--- a/scripts/harvest.sh
+++ b/scripts/harvest.sh
@@ -122,7 +122,7 @@ done
 for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	echo && echo -e "${BOLD}Manage ${DOCUMENT_TYPE} documents...${NC}"
 	INDEX_PATTERN=$(echo "faidare_${DOCUMENT_TYPE}_${ENV}" | sed -E "s/([a-z])([A-Z])/\1-\2/" | tr '[:upper:]' '[:lower:]')
-	
+
 	# Create template
 	TEMPLATE_NAME="${INDEX_PATTERN}_template"
 	echo -e "* Create setting/mapping template ${TEMPLATE_NAME}..."
@@ -140,7 +140,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 	INDEX_NAME="${INDEX_PATTERN}-d"$(date +%s)
 	echo -e "* Index documents into ${ES_HOST}:${ES_PORT}/${INDEX_NAME} indice..."
 	{
-		parallel --bar "
+		parallel -j 2 --bar "
 			curl -s -H 'Content-Type: application/x-ndjson' -H 'Content-Encoding: gzip' -H 'Accept-Encoding: gzip' -XPOST ${ES_HOST}:${ES_PORT}/${INDEX_NAME}/_bulk --data-binary '@{}' > {.}.log.gz" \
 		::: $(find ${DATA_DIR} -name "${DOCUMENT_TYPE}-*.json.gz")
 	} || {
@@ -173,7 +173,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 		echo -e "${ORANGE}Expected ${COUNT_EXTRACTED_DOCS} documents but got ${COUNT_INDEXED_DOCS} indexed documents.${NC}"
 		exit 1;
 	fi
-	
+
 	# Add aliases
 	ALIAS_PATTERN="${INDEX_PATTERN}-group*"
 	ALIAS_EXIST=$(curl -s -XGET "${ES_HOST}:${ES_PORT}/_alias/${ALIAS_PATTERN}" | jq '.status' | grep -q "404" && echo "false" || echo "true")
@@ -182,7 +182,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 		LOG=$(curl -s -XDELETE "${ES_HOST}:${ES_PORT}/*/_aliases/${ALIAS_PATTERN}")
 		check_acknowledgment "${LOG}" "delete aliases"
 	fi
-	
+
 	echo -e "* List groupId from ${INDEX_NAME} (to create filtered aliases)..."
 	GROUP_IDS=$(curl -s -H 'Content-Type: application/json' -XGET "${ES_HOST}:${ES_PORT}/${INDEX_NAME}/_search" -d'
 {
@@ -224,7 +224,7 @@ for DOCUMENT_TYPE in ${DOCUMENT_TYPES}; do
 }")
 		check_acknowledgment "${LOG}" "create aliase"
 	done
-	
+
 	# Delete all but last created indices (thanks to the timestamp suffix)
 	echo -e "* Delete old indices ${INDEX_PATTERN} (to avoid accumulation over time):"
 	OLD_INDICES=$(curl -sf -XGET "${ES_HOST}:${ES_PORT}/_cat/indices/${INDEX_PATTERN}*?h=index" | sort | head -n -1)
-- 
GitLab