Dump the current input JSON
Dump the current input JSON
The purpose of this example is to show how to write the current input JSON of a pre-push extension script to a file to help you develop and debug your scripts.
The script creates an Extensions folder under the COVEO_LOGS_ROOT folder (if it doesn’t already exist) and a subfolder named after the source ID.
It writes the JSON input item (the body argument) to a .log file in that folder.
|
|
Apply this extension only to a duplicate of a production source with a name that clearly indicates it’s for testing. In this test source, crawl only a small content subset for faster debugging iterations and to limit log file size. |
After applying this extension to your test source and running a source rebuild, open the .log file to review the JSON for each crawled item.
# Import required Python libraries. Note: Add non-Python standard libraries to the requirements.txt file.
import sys
import os
import json
import logging
from logging.handlers import TimedRotatingFileHandler
# Initialize a logger.
log_folder = os.path.join(os.getenv('COVEO_LOGS_ROOT'), 'Extensions', os.getenv('SOURCE_ID','unknown'))
os.makedirs(log_folder, exist_ok=True)
fname = f"{os.getenv('OPERATION_TYPE','unknown')}_{os.getenv('OPERATION_ID','unknown')}.log"
fpath = os.path.join(log_folder, fname)
handler = TimedRotatingFileHandler(fpath, when='midnight')
handler.suffix = "%Y-%m-%d"
formatter = logging.Formatter(
fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
handler.setFormatter(formatter)
logging.basicConfig(level=logging.INFO, handlers=[handler])
# -----------------------------------------------------------------
# Extension entry point. The do_extension function must be defined.
# -----------------------------------------------------------------
def do_extension(body):
# Log basic item info
logging.info('The extension will be executed on the item: %s',
body.get('documentId', '<missing>'))
# Pretty-print the input JSON as a multi-line string
json_text = json.dumps(body, indent=2, ensure_ascii=False)
# Log the entire JSON as a single log entry with embedded newlines
logging.info("Input JSON:\n%s", json_text)
return body