import pandas as pd
import math
import numpy as np
from datetime import datetime as dt
pd.options.mode.chained_assignment = None
# Function to calculate distance (http://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points/15737218#15737218)
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
km = 6367 * c
return 1000 * km
# Load locations of traffic lights
# (Available here: http://maps.amsterdam.nl/open_geodata/excel.php?KAARTLAAG=VERKEERSLICHTEN&THEMA=verkeerslichten)
vl = pd.read_csv('../data_verkeerslichten/VERKEERSLICHTEN.csv', sep = ';', encoding = 'utf-8')
vl = vl.copy()[vl.Status == 'In bedrijf'] # only traffic lights that are still in use
vl['LNG'] = vl.LNG.map(lambda x: x.replace(',', '.'))
vl['LAT'] = vl.LAT.map(lambda x: x.replace(',', '.'))
vl = vl[['OBJECTNUMMER', 'LAT', 'LNG']]
vl.columns = ['OBJECTNUMMER', 'LAT', 'LNG']
# Load Fietstelweek link data
# Shapefile obtained from Fietstelweek; clipped to include only Amsterdam, length added, nodes extracted, converted to WGS84 and exported to csv with Qgis
ftw = pd.read_csv('../data_fietstelweek/netwerk 2016/Amsterdam network plus nodes.csv', sep = ',', encoding = 'utf-8')
# This is a csv file with multiple lines per link: one for each node extracted.
# The first and last line of each link will be used to get the start and end coordinates
# This takes a while.
columns = list(ftw.columns)
columns.extend(['x0', 'y0', 'x1', 'y1'])
tmp = pd.DataFrame(columns = columns)
for i, row in ftw.iterrows():
if i % 5000 == 0:
print(i, '/', len(ftw), dt.now())
if i == 0:
x0, y0 = row.X, row.Y
current_link = row.LINKNUMMER
if current_link == row.LINKNUMMER:
x1, y1 = row.X, row.Y
nr = [row[cn] for cn in ftw.columns.values]
nr.extend([x0, y0, x1, y1])
else:
tmp.loc[len(tmp)] = nr
x0, y0 = row.X, row.Y
current_link = row.LINKNUMMER
ftw = tmp
# Apparently, some segments have a speed of zero. They have to be removed or calculations won't work
print('Number of links with speed 0:', len(ftw[ftw.SPEED == 0]))
ftw = ftw[ftw.SPEED != 0]
# Calculate total delay for each segment
ftw['total_delay'] = ftw.INTENSITEI * 0.001 * 60 * 60 * (1 - ftw.SNELHEID_R) * ftw.qgis_lngth / ftw.SPEED
# Calculate distance from each segment's start and end coordinates to nearest traffic light to determine the shortest distance from a traffic light
# This takes a while
ftw['shortest_distance'] = 1000000.0
ftw['nearest_vl'] = ''
for i0, row0 in vl.iterrows():
print(i0, dt.now())
x0, y0, vl_id = float(row0.LNG), float(row0.LAT), row0.OBJECTNUMMER
for i1, row1 in ftw.iterrows():
x1, y1 = float(row1.x0), float(row1.y0)
distance_1 = haversine(x0, y0, x1, y1)
x1, y1 = float(row1.x1), float(row1.y1)
distance_2 = haversine(x0, y0, x1, y1)
distance = min(distance_1, distance_2)
if distance < ftw.shortest_distance[i1]:
ftw['shortest_distance'][i1] = distance
ftw['nearest_vl'][i1] = vl_id
# For a range of thresholds, define segments near traffic lights and:
# Calculate average speed of segments near traffic lights
# Estimate delay at segments near traffic lights as percentage of total delay
# Estimate net delay (using 2 different methods) at segments near traffic lights as percentage of total delay
tmp = pd.DataFrame(columns = ['Threshold', 'Average speed', 'Share', 'Net share 1', 'Net share 2'])
step = 5
for threshold in np.arange(step, 1000, step):
# Determine 'normal' speed at segments not near a traffic light
subset1 = ftw[ftw.shortest_distance > threshold]
normal_speed = np.median(subset1.SPEED)
normal_relative_speed = np.median(subset1.SNELHEID_R)
# Estimate 'net relative speed' (2 methods) i.e. relative speed that takes into account how much delay would normally occur without traffic lights nearby
def get_net_1(x):
net = x / normal_relative_speed
if net > 1:
return 1
return net
ftw['net_relative_speed_1'] = ftw.SNELHEID_R.map(lambda x: get_net_1(x))
ftw['net_delay_1'] = ftw.INTENSITEI * 0.001 * 60 * 60 * (1 - ftw.net_relative_speed_1) * ftw.qgis_lngth / ftw.SPEED
def get_net_2(x):
net = x / normal_speed
if net > 1:
return 1
return net
ftw['net_relative_speed_2'] = ftw.SPEED.map(lambda x: get_net_2(x))
ftw['net_delay_2'] = ftw.INTENSITEI * 0.001 * 60 * 60 * (1 - ftw.net_relative_speed_2) * ftw.qgis_lngth / ftw.SPEED
# Create a subset of segments near traffic lights, using the current threshold
subset2 = ftw[ftw.shortest_distance <= threshold]
# Calculate average speed for segments near traffic lights
average_speed = np.mean(subset2.SPEED)
# Calculate delays at segments near traffic lights as percentage of total delay
total_delay = sum(ftw.total_delay)
delay_vl = sum(subset2.total_delay)
share_total = float(100 * delay_vl / total_delay)
# Estimate net delay at segments near traffic lights as percentage of total delay (2 methods)
net_delay_1 = sum(ftw.net_delay_1)
net_delay_vl_1 = sum(subset2.net_delay_1)
share_net_1 = float(100 * net_delay_vl_1 / total_delay)
net_delay_2 = sum(ftw.net_delay_2)
net_delay_vl_2 = sum(subset2.net_delay_2)
share_net_2 = float(100 * net_delay_vl_2 / total_delay)
# Add estimates for current threshold to temporary dataframe
tmp.loc[len(tmp)] = [threshold, average_speed, share_total, share_net_1, share_net_2]
tmp.to_csv('../time_lost/chart_data.csv', index = False, encoding = 'utf-8')
ftw.to_csv('../time_lost/processed.csv', index = False, encoding = 'utf-8')