const https = require('https')
const fs = require('fs')
const EventEmitter = require('events')
const XLSX = require('xlsx')
const Schema = require('../schema')
// Configuration settings
const regionSchema = require('../../lib/schemas/regionSchema')
const defaultRegionsConfig = require('../../../config/regions.json')
const { capitalizeText } = require('../../lib/utils')
/**
* Load, process and parse an Excel File containing a list of PH municipalities.
* The Excel File should contain a column with string pattern:
* "municipalityName (ProvinceName)"
*/
class ExcelFile {
/**
* Remote download URL of an excel file
* @type {string | null}
*/
#url = null
/**
* Full file path to excel file on local storage
* @type {string | null}
*/
#pathToFile = null
/**
* Region information from the `/app/config/regions.json` or other JSON config file.
* @type {Object | null}
*/
#settings = null
/**
* 10-day Excel file information
* @type {Object.<string, string | null>}
*/
#metadata = {
/**
* Weather forecast date
* @type {string | null}
*/
forecastDate: null
}
/**
* Other class settings and configurations
* @type {Object.<string, string | number>}
*/
#options = {
/**
* SheetJS array index number translated from the Excel headers row count
* before elements containing "municipalityName (provinceName)" data
* @type {number}
*/
dataRowStart: 0,
/** Internal excel file column name read by sheetjs.
* This column contains strings following the pattern
* "municipalityName (provinceName)"
* @type {string}
*/
SHEETJS_COL: process.env.SHEETJS_COLUMN || '__EMPTY'
}
/**
* Excel workbook object parsed by sheetjs
* @type {Object[] | null}
*/
#workbook = null
/**
* Excel sheet names parsed by sheetjs.
* @type {string[] | null}
*/
#sheets = null
/**
* Objects[] Array corresponding to excel rows extracted from the excel sheet by sheetjs.
* @type {Object[] | null}
*/
#data = null
/**
* Object[] Array of processed string corresponding to the column in the excel file
* that contains the list of municipalities following the pattern:
* "municipalityName (provinceName)"
* Content: [{ municipality, province }, ... ]
* @type {Object[] | null}
*/
#datalist = []
/**
* Node event emitter for listening to custom events.
* @type {Function}
*/
events = new EventEmitter()
/**
* List of EventEmitter events.
* @type {Object.<string, string>}
*/
EVENTS = {
LOADED: 'loaded'
}
/**
* Initialize an ExcelFile object
* @param {Object} params - Constructor parameter Object
* @param {string} [params.url] - (Optional) Remote download URL of an excel file
* @param {string} params.pathToFile
* - Full local file path of an existing Excel file, **required** if `params.url` is not provided
* - Full local file path to an existing or non-existent Excel file on which to download/save the remote Excel file from `params.url`,
* if the `params.url` parameter is provided
* @param {Object} [params.settings] - (Optional) Region settings configuration object following the format of the `/app/config/regions.json` file. Defaults to the mentioned file if not provided.
* @param {boolean} [params.fastload] - (Optional) Start loading and parsing the local excel file on class initialization if the "url" param is not provided.
* - If `false` or not provided, call the `.init()` method later on a more convenient time.
*/
constructor ({ url, pathToFile, fastload = true, settings = null, options = null } = {}) {
if (url === '' || pathToFile === '') {
throw new Error('Missing remote file url or local file path.')
}
if (pathToFile === undefined) {
throw new Error('Missing pathToFile.')
}
if (!pathToFile.includes('.xlsx')) {
throw new Error('pathToFile should contain an excel file name ending in .xlsx')
}
this.setOptions(options)
// Set the local Excel file path
this.#pathToFile = pathToFile
// Set the regions settings
this.#settings = new Schema(
settings || defaultRegionsConfig,
regionSchema
).get()
if (url) {
// Set the remote excel file download URL
this.#url = url
} else {
if (fastload) {
this.init()
}
}
}
/**
* Loads an existing excel file contents to a JSON object.
* Downloads a remote excel file if a remote this.#url is provided on the constructor
*/
async init () {
if (this.#url !== null && this.#pathToFile !== null) {
try {
// Download from remote URL
await this.download()
this.events.emit(this.EVENTS.LOADED)
} catch (errMsg) {
throw new Error(errMsg)
}
}
if (this.#url === null && this.#pathToFile !== null) {
try {
// Read from file
this.load()
// Add a slight delay before emmiting the loaded event
setTimeout(() => {
this.events.emit(this.EVENTS.LOADED)
}, 300)
} catch (err) {
throw new Error(err.message)
}
}
}
/**
* Loads an excel file from a local directory using sheetjs.
* Store excel file data as JSON in this.#data
*/
load () {
try {
this.#workbook = XLSX.readFile(this.#pathToFile)
this.#sheets = this.#workbook.SheetNames
// Set data excel row data as Objects
this.#data = XLSX.utils.sheet_to_json(this.#workbook.Sheets[this.#sheets[0]])
// Extract the municipality and province names
this.#datalist = this.#data.reduce((acc, row, index) => {
if (row[this.#options.SHEETJS_COL] !== undefined && this.followsStringPattern(row[this.#options.SHEETJS_COL])) {
const municipality = this.getMunicipalityName(row[this.#options.SHEETJS_COL])
const province = this.getProvinceName(row[this.#options.SHEETJS_COL])
if (province !== null) {
acc.push({
municipality: municipality.trim(),
province
})
}
} else {
// Find the SheetJS array index of rows containing data
// Note: this relies on the structure of the default Excel file in /app/data/day1.xlsx or similar
if (row[this.#options.SHEETJS_COL] === 'Project Areas') {
const OFFSET_FROM_FLAG = 2
this.#options.dataRowStart = index + OFFSET_FROM_FLAG
}
if (this.#metadata.forecastDate === null) {
const contentAsKeys = Object.keys(row ?? '')
const content = contentAsKeys.filter(item => item.includes('FORECAST DATE'))
this.#metadata.forecastDate = content.length > 0
? capitalizeText(content[0])
: 'Forecast Date: n/a'
}
}
return acc
}, [])
console.log(`Loaded ${this.#data.length} rows, ${this.#datalist.length} with data`)
if (this.#datalist.length === 0) {
throw new Error('Failed to load data. Please check the SHEETJS_COLUMN name or the excel file contents.')
}
} catch (err) {
throw new Error(err.message)
}
}
/**
* Downloads a remote excel file to this.#pathToFile
* and loads sheetjs parsed-content
*/
download () {
try {
const file = fs.createWriteStream(this.#pathToFile)
return new Promise((resolve, reject) => {
https.get(this.#url, (res) => {
res.pipe(file)
file.on('finish', () => {
file.close(() => {
try {
resolve(this.load())
} catch (err) {
reject(err.message)
}
})
})
})
})
} catch (err) {
throw new Error(err.message)
}
}
/**
* Checks if a string follows the pattern:
* "municipalityName (provinceName)"
* @param {string} str - String to check
* @returns {boolean} true | false
*/
followsStringPattern (str) {
return /[a-zA-Z,.] *\([^)]*\) *$/.test(str)
}
/**
* Sets the local this.#options settings
* @param {Object} options - Miscellaneous app settings defined in this.#options
* @returns {boolean}
*/
setOptions (options) {
if (!options) return false
for (const key in this.#options) {
if (options[key] !== undefined) {
this.#options[key] = options[key]
}
}
}
/**
* Checks if a string contains special characters
* @param {string} str - String to check
* @returns {boolean}
*/
static hasSpecialChars (str) {
/* eslint-disable no-control-regex */
const regex = /[^\x00-\x7F]/g
return regex.test(str)
}
/**
* Cleans/removes default-known special characters and garbled text defined in config from string.
* @param {string} str - String to clean
* @returns {string} - Clean string
*/
static removeGarbledText (str) {
// Known garbled special text
let charMap = {
'├â┬▒': 'ñ', // Replace "├â┬▒" with "ñ"
â: '' // Remove "â"
}
// Other special characters from config
const specialChars = (process.env.SPECIAL_CHARACTERS?.split(',') ?? [])
.reduce((list, item) => {
const [key, value] = item.split(':')
return {
...list,
...((key || value) && { [key]: value ?? '' })
}
}, {})
charMap = {
...charMap,
...specialChars
}
for (const [key, value] of Object.entries(charMap)) {
str = str.replace(new RegExp(key, 'g'), value)
}
return str
}
/**
* Extracts the municipality name from a string following the pattern:
* "municipalityName (provinceName)"
* @param {string} str
* @returns {string} municipality name
*/
getMunicipalityName (str) {
const municipalityName = str.replace(/ *\([^)]*\) */g, '')
const cleanText = ExcelFile.hasSpecialChars(municipalityName)
? ExcelFile.removeGarbledText(municipalityName)
: municipalityName
return cleanText
}
/**
* Extracts the province name from a string following the pattern:
* "municipalityName (provinceName)"
* @param {string} str
* @returns {string} province name
* @returns {null} Returns null if "provinceName" is not found
*/
getProvinceName (str) {
if (!str) return null
const match = str.match(/\(([^)]+)\)/)
return (match !== null)
? match[1]
: match
}
// Returns the processed Object array (masterlist) of municipality and province names
get datalist () {
return this.#datalist
}
// Sets the private data list contents
set datalist (data) {
this.#datalist = data
}
// Returns the raw Excel JSON data
get data () {
return this.#data
}
// Returns the region data settings object
get settings () {
return this.#settings
}
// Returns the local options object
get options () {
return this.#options
}
// Returns the loaded Excel file's metadata
get metadata () {
return this.#metadata
}
// Returns the full path to the 10-day weather forecast Excel file
get pathToFile () {
return this.#pathToFile
}
get url () {
return this.#url
}
/**
* Get the requested data with other misc data
* @param {String[]} provinces - List of provinces
* @returns {Object} Formatted raw data with misc. metadata
*/
shapeJsonData (provinces) {
const url = (this.#url) ? this.#url : `local datasource cache from ${process.env.DEFAULT_EXCEL_FILE_URL}`
return {
metadata: {
source: url || '',
title: 'List of PH Municipalities By Province and Region',
description: 'This dataset generated with reference to the excel file contents from the source URL.',
date_created: new Date().toDateString()
},
data: this.listMunicipalities({ provinces })
}
}
/**
* List the municipalities of given province(s)
* @param {String[]} provinces - Array of case-sensitive province names. Starts with an upper case.
* @returns {Object} Returns an object with the format:
* [
* { province1: ['municipality1', 'municipality2', .... ] },
* { province2: ['municipality1', 'municipality2', .... ] },
* ...
* ]
*/
listMunicipalities ({ provinces }) {
if (this.#datalist.length === 0) {
throw new Error('No data to parse.')
}
if (provinces === undefined) {
throw new Error('Missing the provinces parameter.')
}
return this.#datalist
.filter(item => provinces.includes(item.province.trim()))
.reduce((acc, item) => {
if (acc[item.province] === undefined) {
acc[item.province] = []
}
const cleanText = ExcelFile.hasSpecialChars(item.municipality)
? ExcelFile.removeGarbledText(item.municipality)
: item.municipality
acc[item.province].push(cleanText)
// Sort municipality names alphabetically
if (process.env.SORT_ALPHABETICAL === '1') {
acc[item.province].sort()
}
return { ...acc }
}, {})
}
/**
* Writes queried municipalities data to a JSON file.
* Lists municipalities by by provinces.
* @param {String[]} provinces - Array of case-sensitive province names. Starts with an upper case.
* @param {string} fielName - Full file path to a JSON file
* @param {boolean} prettify - Write the JSON content with proper spacings and newlines
* @returns {Object} Formatted raw data with misc. metadata
*/
writeMunicipalities ({ provinces, fileName, prettify = false }) {
if (!fileName) {
throw new Error('Please enter a filename ending in .json')
}
if (!/\.(json)$/i.test(fileName)) {
throw new Error('Please enter a filename ending in .json')
}
try {
const str = this.shapeJsonData(provinces)
const json = (prettify)
? JSON.stringify(str, null, 2)
: JSON.stringify(str)
// Write results to a JSON file
fs.writeFileSync(fileName, json, 'utf-8')
return str
} catch (err) {
throw new Error(err.message)
}
}
/**
* Lists the province names of a region defined in the settings file
* @param {string} regionName - Region name that matches with the `/app/config/regions.json` file's `data[N].name`
* @returns {String[]} List provinces under the `regionName`.
*/
listProvinces (regionName) {
return this.#settings.data
.find(region => region.name === regionName)?.provinces ?? []
}
/**
* Lists the province names of a region defined in the settings (PAGASA seasonal config) file or from the parsed Excel file
* @param {string} region - Region name that matches with the `/app/config/regions.json` file's `data[N].name`
* @param {boolean} fromExcel - Flag to return the province names from the parsed 10-day Excel file. Defaults to `false`.
* - Note: Province names from a "remote" Excel file may change without notice.
* - It may differ from the contents of the "default" settings (PAGASA seasonal config) file.
* - If the province names from the "remote" Excel file and "default" settings (PAGASA seasonal config) file vary,
* consider initializing an `ExcelFile` or `ExcelFactory` class with a custom settings config file following
* the format of the default settings file in `/app/config/regions.json`
* @returns {String[]} List of all provinces from a 10-day Excel file.
*/
listAllProvinces (fromExcel = false) {
if (fromExcel) {
// Return unique province names from the parsed Excel file
return this.#datalist
.map(item => item.province)
.filter((x, i, a) => a.indexOf(x) === i)
} else {
// Return province names from the PAGASA seasonal config file
return this.listRegions().reduce((list, region) => {
const provinces = this.listProvinces(region)
return [...list, ...provinces]
}, [])
}
}
/**
* Lists the region names defined in the settings file
* @param {Object} key - Key name of the region data definition key.
* - Valid values are: `name`, `abbrev`, `region_num`, and `region_name`
* - See the `/app/config/regions.json` file -> `data[]` item keys for more information.
* @returns {String[]} A list of province information by key
*/
listRegions (key = null) {
if (!key) {
return this.#settings.data.map(region => region.name)
} else {
const keys = [...Object.keys(this.#settings.data[0])]
if (
!keys.includes(key) || typeof key !== 'string'
) {
return []
}
return this.#settings.data
.map(region => region[key])
}
}
}
module.exports = ExcelFile