convert-csv-to-json.js 2.55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/**
 * Reads all CSV files in subdirectories of 'input' directory and converts them it to a single json named 'bike-sharing-trip-data'.
 *
 * Make sure you your folder structure inside the input directory looks like:
 *
 * ├── 2015TripData
 * │         ├── 1a.JourneyDataExtract04Jan15-17Jan15.csv
 * │         ├── ...
 * ├── 2016TripData
 * │         ├── 01aJourneyDataExtract10Jan16-23Jan16.csv
 * │         ├── ...
 *
 */

// main script is wrapped in async function in order to use 'await' syntax
(async () => {
    const csv = require('csvtojson')
    const fs = require('fs')

    const fileStream = fs.createWriteStream('bike-sharing-trip-data.json')
    const inputDirectory = 'input'
    const dirs = fs.readdirSync(inputDirectory).filter(path => !path.startsWith('.'))

    let isFirstTrip = true

    fileStream.write('[')

    await asyncForEach(dirs, async dirName => {
        const files = fs.readdirSync(`${inputDirectory}/${dirName}`)

        await asyncForEach(files, async fileName => {
            const trips = await csv({
                headers: ['_id', 'duration', 'bikeId', 'endDate', 'endStationId', 'endStationName', 'startDate', 'startStationId', 'startStationName']
            }).fromFile(`${inputDirectory}/${dirName}/${fileName}`)

            trips.map(trip => {
                trip.startDate = toUnixTimestamp(trip.startDate)
                trip.endDate = toUnixTimestamp(trip.endDate)
                trip.duration = Number(trip.duration)

                // return trip with unix timestamps instead of date-strings
                return trip
            })

            // if first trip, remove it from list (shift) and write to filestream without trailing comma
            if(isFirstTrip){
                fileStream.write('\n' + JSON.stringify(trips.shift()))
                isFirstTrip = false
            }

            trips.forEach(trip => fileStream.write(',\n' + JSON.stringify(trip)))
        })
    })

    fileStream.write('\n]')
    fileStream.end()
})()

// convert date string of form 'dd/MM/yyyy HH:mm' to unix timestamp
const toUnixTimestamp = dateTimeString => {
    // split string by slash space and colon
    const [day, month, year, hour, minute] = dateTimeString.split(/[/\s:]/).map(part => parseInt(part))

    // month needs -1 offset because January = 0 in Javascript Date
    return Date.UTC(year, month - 1, day, hour, minute) / 1000
}

async function asyncForEach(array, callback) {
    for (let index = 0; index < array.length; index++) {
        await callback(array[index], index, array);
    }
}