/** * Reads all CSV files in subdirectories of 'input' directory and converts them it to a single json named 'bike-sharing-trip-data'. * * Make sure you your folder structure inside the input directory looks like: * * ├── 2015TripData * │ ├── 1a.JourneyDataExtract04Jan15-17Jan15.csv * │ ├── ... * ├── 2016TripData * │ ├── 01aJourneyDataExtract10Jan16-23Jan16.csv * │ ├── ... * */ // main script is wrapped in async function in order to use 'await' syntax (async () => { const csv = require('csvtojson') const fs = require('fs') const fileStream = fs.createWriteStream('bike-sharing-trip-data.json') const inputDirectory = 'input' const dirs = fs.readdirSync(inputDirectory).filter(path => !path.startsWith('.')) let isFirstTrip = true fileStream.write('[') await asyncForEach(dirs, async dirName => { const files = fs.readdirSync(`${inputDirectory}/${dirName}`) await asyncForEach(files, async fileName => { const trips = await csv({ headers: ['_id', 'duration', 'bikeId', 'endDate', 'endStationId', 'endStationName', 'startDate', 'startStationId', 'startStationName'] }).fromFile(`${inputDirectory}/${dirName}/${fileName}`) trips.map(trip => { trip.startDate = toUnixTimestamp(trip.startDate) trip.endDate = toUnixTimestamp(trip.endDate) trip.duration = Number(trip.duration) // return trip with unix timestamps instead of date-strings return trip }) // if first trip, remove it from list (shift) and write to filestream without trailing comma if(isFirstTrip){ fileStream.write('\n' + JSON.stringify(trips.shift())) isFirstTrip = false } trips.forEach(trip => fileStream.write(',\n' + JSON.stringify(trip))) }) }) fileStream.write('\n]') fileStream.end() })() // convert date string of form 'dd/MM/yyyy HH:mm' to unix timestamp const toUnixTimestamp = dateTimeString => { // split string by slash space and colon const [day, month, year, hour, minute] = dateTimeString.split(/[/\s:]/).map(part => parseInt(part)) // month needs -1 offset because January = 0 in Javascript Date return Date.UTC(year, month - 1, day, hour, minute) / 1000 } async function asyncForEach(array, callback) { for (let index = 0; index < array.length; index++) { await callback(array[index], index, array); } }