Commit e570f4b3 authored by Samuel Mergenthaler's avatar Samuel Mergenthaler
Browse files

add entry for bike sharing dashboard

parent 99cc083c
Pipeline #5868 passed with stages
in 15 seconds
# Visualization of bike sharing data in London
![](./imgs/architecture-overview.png)
## Project structure:
| path | explanation |
| --------------- | ----------- |
| `app/backend` | A node.js server, storing bike trip and bike point documents and providing dynamic statistics about those at several endpoints |
| `app/frontend` | A React.js Web Frontend, visualizing statistics provided by backend in diagrams and map view |
| `data` | Information where to download the source data and preprocessing to prepare data for storage in MongoDB |
## Prerequisites
- node and npm must be installed
- docker and docker-compose must be installed
## Run
1. Go into `app/backend/database` and run `docker-compose up -d` to start the database (available on port 27017)
2. Go into `app/backend` and run `npm start` to start the backend (available on port 8081)
3. Go into `app/frontend` and run `npm start` to start the frontend (available on port 3000)
Please note: At first startup, the server will store bike trip documents from a sample json file into the database. This might take a few seconds.
## More bike trip data
The sample bike trips (`app/backend/src/shared/data/bike-sharing-trip-data-4-january-28-february-reduced.json`) range from 4. January to 28. February, with many trips removed to reduce the file size. If you want to use more bike trips of a larger time range, follow these steps:
1. Download the bike trip CSV-files [for 2015](https://cycling.data.tfl.gov.uk/usage-stats/2015TripDatazip.zip) and [for 2016](https://cycling.data.tfl.gov.uk/usage-stats/2016TripDataZip.zip). More csv files are available [here](https://cycling.data.tfl.gov.uk/), in the folder 'usage-stats'.
2. Go into `data/trips/preprocessing` and follow instructions in README there. It will result in a single json file.
3. Copy the json of step 2 into `app/backend/src/shared/data`
4. Open the file `app/backend/src/Server.ts` and adjust the path in 'createReadStream(...)' to match your newly created json file.
5. If you've used the server in the past, delete the folder `app/backend/database/db`.
6. Start the database and then the server, it will load all the documents from your new json into the database. In subsequent runs it won't do that again, only if you clear your database again.
## Bike point data
We don't use live data of bike points in our visualizations, so we just downloaded all bike point documents once and added them to the repository (`app/backend/src/shared/data/bike-point-data.json`), to be used by the server.
If you want, you can download the most up to date bike-point documents [here](https://api.tfl.gov.uk/BikePoint/) and use those instead.
node_modules
dist
jet-logger.log
database/db
./src/shared/data/bike-sharing-trip-data.json
\ No newline at end of file
# Server providing data for dashboard frontend
## Start server locally
Before you start the backend itself, go into the database directory and execute `docker-compose up -d`. On first startup of the app, data will be written to this database.
Run `npm install` to download all dependencies.
Run `npm start` to start the server locally. It will listen on port 8081.
Please note: The script 'start:prod' in the package.json is only for production environment. It will also build and serve the frontend.
## Endpoints
The endpoints of our api are defined in the folder `src/routes`.
Please note: All query parameters 'from' and 'to' are considered unix timestamps.
Endpoint to get all bike points:
- `/api/bike-points/all`
Endpoint for landing page:
- `/api/bike-trip-durations`
- query params: from, to, classSize (in seconds)
Endpoint for bike point details page:
- `/api/bike-point-details/:bikePointId`
- query params: from, to, selectedDay (0=Monday to 6=Sunday)
Endpoint for map page:
- `/api/bike-points-activity`
- query params: from, to
\ No newline at end of file
/**
* Remove old files, build frontend and copy files to dist and build backend and copy files to dist.
*/
import fs from 'fs-extra';
import Logger from 'jet-logger';
import childProcess from 'child_process';
// Setup logger
const logger = new Logger();
logger.timestamp = false;
(async () => {
try {
// Remove current build
await remove('./dist/')
// build frontend and copy the build files to dist folder in order to serve them from there
await exec('npm run build', './../frontend/')
await move('./../frontend/build', './dist/frontend-build')
// Copy production env file
await copy('./src/pre-start/env/production.env', './dist/pre-start/env/production.env')
// Copy back-end files
await exec('tsc --build tsconfig.prod.json', './')
} catch (err) {
logger.err(err)
}
})()
function remove(loc: string): Promise<void> {
return new Promise((res, rej) => {
return fs.remove(loc, (err) => {
return (!!err ? rej(err) : res())
})
})
}
function copy(src: string, dest: string): Promise<void> {
return new Promise((res, rej) => {
return fs.copy(src, dest, (err) => {
return (!!err ? rej(err) : res())
})
})
}
function move(src: string, dest: string): Promise<void> {
return new Promise((res, rej) => {
return fs.move(src, dest, (err) => {
return (!!err ? rej(err) : res())
})
})
}
function exec(cmd: string, loc: string): Promise<void> {
return new Promise((res, rej) => {
return childProcess.exec(cmd, {cwd: loc}, (err, stdout, stderr) => {
if (!!stdout) {
logger.info(stdout)
}
if (!!stderr) {
logger.warn(stderr)
}
return (!!err ? rej(err) : res())
})
})
}
version: '3'
services:
mongodb:
image: mongo:4.4.3
hostname: mongodb
container_name: bikesharing-data-mongodb
ports:
- "27017:27017"
volumes:
- ./db:/data/db
\ No newline at end of file
{
"name": "bike-sharing-data-server",
"version": "1.0.0",
"scripts": {
"build": "ts-node build.ts",
"lint": "eslint . --ext .ts",
"start:prod": "node -r module-alias/register ./dist --env=production",
"start": "nodemon",
"test": "nodemon --config ./spec/nodemon.json"
},
"nodemonConfig": {
"watch": [
"src"
],
"ext": "ts, html",
"ignore": [
"src/public"
],
"exec": "ts-node -r tsconfig-paths/register ./src"
},
"_moduleAliases": {
"@daos": "dist/daos",
"@entities": "dist/entities",
"@shared": "dist/shared",
"@server": "dist/Server"
},
"eslintConfig": {
"parser": "@typescript-eslint/parser",
"plugins": [
"@typescript-eslint"
],
"extends": [
"eslint:recommended",
"plugin:@typescript-eslint/recommended",
"plugin:@typescript-eslint/recommended-requiring-type-checking"
],
"parserOptions": {
"project": "./tsconfig.json"
},
"rules": {
"max-len": [
"error",
{
"code": 500
}
],
"no-console": 1,
"no-extra-boolean-cast": 0,
"@typescript-eslint/restrict-plus-operands": 0,
"@typescript-eslint/explicit-module-boundary-types": 0,
"@typescript-eslint/no-explicit-any": 0,
"@typescript-eslint/no-floating-promises": 0,
"@typescript-eslint/no-unsafe-member-access": 0,
"@typescript-eslint/no-unsafe-assignment": 0
}
},
"eslintIgnore": [
"src/public/",
"build.ts"
],
"dependencies": {
"@types/mongodb": "^3.6.3",
"command-line-args": "^5.1.1",
"cookie-parser": "^1.4.5",
"cors": "^2.8.5",
"dotenv": "^8.2.0",
"express": "^4.17.1",
"express-async-errors": "^3.1.1",
"helmet": "^4.3.1",
"http-status-codes": "^2.1.4",
"jet-logger": "^1.0.4",
"jsonfile": "^6.1.0",
"module-alias": "^2.2.2",
"mongodb": "^3.6.3",
"morgan": "^1.10.0",
"stream-chain": "^2.2.4",
"stream-json": "^1.7.1"
},
"devDependencies": {
"@types/command-line-args": "^5.0.0",
"@types/cookie-parser": "^1.4.2",
"@types/cors": "^2.8.9",
"@types/express": "^4.17.9",
"@types/find": "^0.2.1",
"@types/fs-extra": "^9.0.6",
"@types/jsonfile": "^6.0.0",
"@types/morgan": "^1.9.2",
"@types/node": "^14.14.17",
"@types/stream-chain": "^2.0.0",
"@types/stream-json": "^1.5.1",
"@typescript-eslint/eslint-plugin": "^4.11.1",
"@typescript-eslint/parser": "^4.11.1",
"eslint": "^7.16.0",
"find": "^0.3.0",
"fs-extra": "^9.0.1",
"nodemon": "^2.0.6",
"ts-node": "^9.1.1",
"tsconfig-paths": "^3.9.0",
"typescript": "^4.1.3"
}
}
import cookieParser from 'cookie-parser'
import morgan from 'morgan'
import path from 'path'
import helmet from 'helmet'
import cors from 'cors'
import express, { NextFunction, Request, Response } from 'express'
import StatusCodes from 'http-status-codes'
import 'express-async-errors'
import BaseRouter from './routes'
import logger from '@shared/Logger'
import { MongoClient } from 'mongodb'
import { chain } from 'stream-chain'
import * as fs from 'fs'
import { parser } from 'stream-json'
import { streamArray } from 'stream-json/streamers/StreamArray'
import { IBikeTrip } from '@entities/BikeTrip'
/************************************************************************************
* Fill database and open connection
***********************************************************************************/
export const dbName = 'bikesharing'
const url = `mongodb://localhost:27017/${dbName}`
export const dbClient = new MongoClient(url, { useNewUrlParser: true, useUnifiedTopology: true })
export const bikeTripsCollectionName = 'biketrips';
(async () => {
try {
console.log("Attempting to connect to MongoDB server.")
await dbClient.connect()
console.log("Connected to MongoDB server.")
const db = dbClient.db(dbName)
const bikeTripCollection = await db.collection(bikeTripsCollectionName)
const stats = await bikeTripCollection.stats()
if(stats.count === 0){
// create ascending index on fields startDate and endDate
bikeTripCollection.createIndex({ startDate : 1 })
bikeTripCollection.createIndex({ endDate : 1 })
const dataStreamFromFile = fs.createReadStream('src/shared/data/bike-sharing-trip-data-4-january-28-february-reduced.json')
const pipeline = chain([
dataStreamFromFile,
parser(),
streamArray(),
])
let bikeTripsTemp: IBikeTrip[] = []
const startTime = Date.now()
console.log('Inserting bike trips into database, please wait...')
pipeline.on('data', async bikeTripsChunk => {
bikeTripsTemp.push(bikeTripsChunk.value)
if(bikeTripsTemp.length === 50000){
dataStreamFromFile.pause()
await bikeTripCollection.insertMany(bikeTripsTemp)
bikeTripsTemp = []
console.log(`...${(await bikeTripCollection.stats()).count} documents in database...`)
dataStreamFromFile.resume()
}
})
pipeline.on('end', async () => {
await bikeTripCollection.insertMany(bikeTripsTemp)
const bikeTripCollectionStats = await bikeTripCollection.stats()
console.log(`Database inserts done! Added ${bikeTripCollectionStats.count} bike trip documents to database, in ${(Date.now() - startTime)/1000} seconds.`)
})
} else {
console.log(`Found ${stats.count} bike trip documents in database`)
}
} catch (err) {
console.log(err.stack)
}
})()
/************************************************************************************
* Set basic express settings
***********************************************************************************/
const app = express()
const { BAD_REQUEST } = StatusCodes
app.use(express.json())
app.use(express.urlencoded({extended: true}))
app.use(cookieParser())
// Show routes called in console during development
if (process.env.NODE_ENV === 'development') {
app.use(morgan('dev'))
}
// Allow cors
// eslint-disable-next-line @typescript-eslint/no-unsafe-call
app.use(cors())
// Security
if (process.env.NODE_ENV === 'production') {
app.use(helmet())
}
// Add APIs
app.use('/api', BaseRouter)
// Print API errors
// eslint-disable-next-line @typescript-eslint/no-unused-vars
app.use((err: Error, req: Request, res: Response, next: NextFunction) => {
logger.err(err, true)
return res.status(BAD_REQUEST).json({
error: err.message,
})
})
/************************************************************************************
* Serve front-end content
***********************************************************************************/
const dir = path.join(__dirname, 'frontend-build')
app.set('views', dir)
// middleware is needed to make express serve static CSS and Javascript files
app.use(express.static(dir))
app.get('/*', (req: Request, res: Response) => {
res.sendFile('index.html', {root: dir})
})
// Export express instance
export default app
import bikePointsData from '../../shared/data/bike-point-data.json'
import { IBikePoint } from '@entities/BikePoint'
const bikePoints = bikePointsData as IBikePoint[]
export interface IBikePointDao {
getById: (bikePointId: string) => Promise<IBikePoint | null>
getAll: (bikePointId: string) => Promise<IBikePoint[]>
}
class BikePointDao implements IBikePointDao {
/**
* @param bikePointId (without Prefix 'BikePoints_', only the numbers behind)
*/
public getById(bikePointId: string): Promise<IBikePoint | null> {
return Promise.resolve(getBikePointById(bikePoints, bikePointId))
}
public getAll(): Promise<IBikePoint[]> {
return Promise.resolve(bikePoints.map(bikePoint => removeBikePointIdPrefix(bikePoint)))
}
}
export const getBikePointById = (bikePoints: IBikePoint[], bikePointId: string) => {
for (const bikePoint of bikePoints) {
if (bikePoint.id.replace('BikePoints_', '') === bikePointId) {
return removeBikePointIdPrefix(bikePoint)
}
}
return null
}
const removeBikePointIdPrefix = (bikePoint: IBikePoint) => ({...bikePoint, id: bikePoint.id.replace('BikePoints_', '')})
export default BikePointDao
// all dates are unix timestamps
export interface IBikePoint {
id: string,
url: string,
commonName: string,
placeType: string,
additionalProperties: BikePointProperty[],
children: any[],
childrenUrls: string[],
lat: number,
lon: number,
}
export interface BikePointProperty {
category: string,
key: string,
sourceSystemKey: string,
value: string,
modified: string
}
// all dates are unix timestamps
export interface IBikePointDetails {
id: string,
commonName: string,
diagrammData: IBikePointActivityMap,
installDate: number,
nbDocks: number
}
export interface IBikePointActivityAtHourOfDay {
avgNbRentals: number,
avgNbReturns: number,
avgNbTotal: number
}
export type IBikePointActivityMap = {[hourOfDay: number]: IBikePointActivityAtHourOfDay}
\ No newline at end of file
// all dates are unix timestamps
export interface IBikeTrip {
rentalId: string
duration: number
bikeId: string
endDate: number
endStationId: string
endStationName: string
startDate: number
startStationId: string
startStationName: string
}
export interface IBikeTripDurationCount {
classLabel: string,
count: number
}
import './pre-start' // Must be the first import
import app from '@server'
import logger from '@shared/Logger'
// Start the server
const port = Number(process.env.PORT || 3000)
app.listen(port, () => {
logger.info('Bike sharing data server started on port: ' + port)
})
## Environment ##
NODE_ENV=development
## Server ##
PORT=8081
HOST=localhost
## Setup jet-logger ##
JET_LOGGER_MODE=CONSOLE
JET_LOGGER_FILEPATH=jet-logger.log
JET_LOGGER_TIMESTAMP=TRUE
JET_LOGGER_FORMAT=LINE
## Environment ##
NODE_ENV=production
## Server ##
PORT=8081
HOST=localhost
## Setup jet-logger ##
JET_LOGGER_MODE=FILE
JET_LOGGER_FILEPATH=jet-logger.log
JET_LOGGER_TIMESTAMP=TRUE
JET_LOGGER_FORMAT=LINE
/**
* Pre-start is where we want to place things that must run BEFORE the express server is started.
* This is useful for environment variables, command-line arguments, and cron-jobs.
*/
import path from 'path'
import dotenv from 'dotenv'
import commandLineArgs from 'command-line-args'
(() => {
// Setup command line options
const options = commandLineArgs([
{
name: 'env',
alias: 'e',
defaultValue: 'development',