# Import libraries
#! pip install gurobipy
#! pip install geopy
import gurobipy as gp
from gurobipy import *
import numpy as np
import pandas as pd
import geopy
from geopy.geocoders import Nominatim
import geopy.distance
geolocator = Nominatim(user_agent="DECISION ANALYTICS")
import plotly.express as px
import plotly.graph_objects as go
import re
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
# DataFrame with most features requires
# Candidate Sites
df_sites = pd.read_csv('Data/data.csv', sep =',')
print(df_sites.shape)
df_sites.head(2)
# Demand Cities
df_cities = pd.DataFrame()
cities = ['Los Angeles, CA',
'Phoenix, AZ',
'Houston, TX', 'Chicago, IL','New York City, NY' ]
cities_abv = [ 'Los Angeles', 'Phoenix', 'Houston', 'Chicago','New York City' ]
df_cities['City'] = cities
df_cities['CityAbv'] = cities_abv
(28, 31)
# Latitude, Longitude and Coordinates
def get_coords(df, colname = 'City'):
# We get latitude and longitude using Geolocator methods
df['LATITUDE_X'] = df[colname].apply(lambda x: geolocator.geocode(x).latitude)
df['LONGITUDE_Y'] = df[colname].apply(lambda x: geolocator.geocode(x).longitude)
colnames = df.columns.to_series().reset_index(drop=True)
latitude_column_idx = colnames[colnames == 'LATITUDE_X'].index[0]
longitude_column_idx = colnames[colnames == 'LONGITUDE_Y'].index[0]
# We save the coordinates as tuples into a single columns
df['COORDINATES'] = df.apply(lambda x: (x[latitude_column_idx], x[longitude_column_idx]), axis=1)
return df
df_sites = get_coords(df_sites)
df_cities = get_coords(df_cities)
colnames = df_sites.columns.to_series().reset_index(drop=True)
coords_ix = colnames[colnames == 'COORDINATES'].index[0]
labels = []
# one city at a time, we calculate the distances of all the candidates to that city, using Geopy's distance module.
for row in df_cities.to_dict(orient="records"):
coords_city = row['COORDINATES']
label = f"Dist_KM_to_{row['City']}"
labels.append(label)
# Vincenty distance is the standard algorithm used to calculate long distances between cities (considers Earth's spherical shape)
df_sites[label] = df_sites.apply(lambda x: geopy.distance.geodesic(x[coords_ix], coords_city).km, axis=1)
#df_sites['TOTAL_DIST_KM'] = df_sites[labels].sum(axis=1)
years = 50
df_sites['EXTREME_WEATH_EVENTS_YEARLY'] = df_sites.Cnt_All_Incidents/years
Households (US Census Bureau, 2019)
Connectivity to Internet Services (US Census Bureau, 2019)
Bandwidth GB Consumption per Household (WSJ, 2020)
FINAL CALCULATION
$ (Nr. \: of\: Hh) * (\% of\: Hh\: with\: Internet) * (Avg.\: GB_{Monthly} \: consumption) $
mkt_share = 0.2# service 20% of Households
NR_HOUSEHOLDS = np.array([1_383_869, 565_832, 858_374,1_066_829, 3_167_034])*mkt_share
PCT_INTNET = np.array([0.83, 0.83, 0.80, 0.79, 0.82]) # LA, PHX, HOU, CHI, NYC
AVG_GB = 250 # Monthly
# Final Demand calculation: NR_HOUSEHOLDS * PCT_INTNET * AVG_GB
DEMAND = NR_HOUSEHOLDS * PCT_INTNET * AVG_GB # Monthly
df_cities['DEMAND'] = DEMAND
df_cities['NR_HOUSEHOLDS'] = NR_HOUSEHOLDS
DEMAND
array([5.74305635e+07, 2.34820280e+07, 3.43349600e+07, 4.21397455e+07, 1.29848394e+08])
The objective is to select the minimum amount of datacenter sites to service the 5 big cities that the company wants to service with exclusive fiber cable connections. The program will attempt to minimize the costs (construction, fiber, downtime costs) while accounting for weather, demand and supply constraints. See figure below for possible candidates (in blue) and cities to service (green)
fig = go.Figure()
fig = px.scatter_mapbox(
df_cities,
lat="LATITUDE_X",
lon="LONGITUDE_Y",
hover_name="City",
color_discrete_sequence=["green"],
zoom=3,
height=400,
size='NR_HOUSEHOLDS'
)
fig3 = px.scatter_mapbox(
df_sites,
lat="LATITUDE_X",
lon="LONGITUDE_Y",
hover_name="City",
hover_data = ['Composite_Cost', 'Mean_Temp_F', 'EXTREME_WEATH_EVENTS_YEARLY'],
color_discrete_sequence=["blue"],
zoom=3,
height=400
)
fig.add_trace(fig3.data[0]) # adds the line trace to the first figure
fig.update_layout(
mapbox_style="open-street-map",
title='Candidate datacenters (in blue) to service cities (in green)',
)
fig.show()
model = gp.Model('Data Center Site Selection')
Restricted license - for non-production use only - expires 2022-01-13
Data Sources
Cost of fiber: CTC Technology & Energy
Construction Cost Index: Statista
Cost of Construction: JLL
Extreme weather events FEMA 1964-2013 Disaster Declarations Summarized by County
Cost of downtime: Ponemon Institute© Research Report, Emerson Network Power
Avg Temperature F: NCDC.NOAA.gov
# Set vectors and matrices
DISTANCES = df_sites[labels].values
COST_OF_FIBER = 500 * 1.6
COST_INDEX = np.array(df_sites.Composite_Cost/100)
COST_OF_CONSTRUCTION = 16_000_000
PROB_EXTREME_WEATHER = np.array(df_sites['EXTREME_WEATH_EVENTS_YEARLY'])
COST_OF_DOWNTIME = 17_000
# We want to satisfy 1.2x the demand to build redundancy
DEMAND_REDUNDANT = np.array(DEMAND * 1.2)
AVG_TEMP = np.array(df_sites.Mean_Temp_F)
db_sites = df_sites['City'].tolist()
print('Sites:', db_sites)
cities = df_cities['City'].tolist()
print('Cities:', cities)
Sites: ['Phoenix', 'Los Angeles', 'San Diego', 'San Francisco', 'Denver', 'Miami', 'Atlanta', 'Chicago', 'Indianapolis', 'Baltimore', 'Boston', 'Detroit', 'Kansas City', 'St. Louis', 'Buffalo', 'New York', 'Cincinnati', 'Cleveland', 'Columbus', 'Philadelphia', 'Pittsburgh', 'Memphis', 'Nashville', 'Dallas', 'Houston', 'San Antonio', 'Seattle', 'Milwaukee'] Cities: ['Los Angeles, CA', 'Phoenix, AZ', 'Houston, TX', 'Chicago, IL', 'New York City, NY']
# Decision Variables
# Rows: Candidate Sites (28)
# Columns: Cities (5)
I = len(db_sites)
J = len(cities)
# GB Trafic between sites and cities
names = [f'GB from site {site} to city {city}' for site in db_sites for city in cities]
X = model.addVars(I, J, vtype=GRB.CONTINUOUS, lb=0, name=names)
# Fiber Linkage between sites and cities
names = [f'Fiber linkage between {site} and city {city}' for site in db_sites for city in cities]
Z = model.addVars(I, J, vtype=GRB.BINARY, lb=0, ub=1, name=names)
# Decision to build site in candidate location
names = [f'Decision to build site in {site}' for site in db_sites]
Y = model.addVars(I, vtype=GRB.BINARY, lb=0, ub=1, name=names)
$ \sum_{i}^{28} [Y_i* (c_i*k_1 + w_i * k_2 ) ] + \sum_{i}^{28}\sum_{j}^{5} [Z_{i,j} * (d_{i,j} * k_3)] $
Where
$Y_i$: Decision to build datacenter in site "i"
$c_i$: Index of construction costs in site "i"
$k_1 = 16,000,000$; Cost of building datacenter (constant)
$w_i$: Number of extreme weather events in a year at site "i"
$k_2 = 17,000$; Cost of downtime (constant)
$Z_{i,j}$: Decision to build fiber link between site "i" and city "j"
$d_{i,j}$: Distance in Km between site "i" and city "j"
$k_3 = 800$; Cost of long-haul fiber cable per Km (constant)
# Decision Function
exp = sum( Y[i]*
( COST_INDEX[i] * COST_OF_CONSTRUCTION
+ PROB_EXTREME_WEATHER[i] * COST_OF_DOWNTIME )
for i in range(I)) + \
sum( Z[i,j]*DISTANCES[i,j] * COST_OF_FIBER for i in range(I) for j in range(J))
model.setObjective(exp, GRB.MINIMIZE)
Demand Constraint
$ \sum_{i}^{28} [ X_{i,j} ] \geq D_j$
for $j$ = {Los Angeles, ..., New York} (5)
where:
$X_{i,j}$: Gigabytes (GB) traffic between site "i" and city "j"
$D_j$: Demand (in monthly GB) from city "j", accounting for redundancy policy
# Demand
for city in range(J):
print(f'Demand (with redundancy) for {cities[city]}:', DEMAND_REDUNDANT[city])
lhs = sum(X[site, city] for site in range(len(db_sites)))
model.addConstr(lhs >= DEMAND_REDUNDANT[city], name = f'Demand (with redundancy) for {cities[city]}:')
Demand (with redundancy) for Los Angeles, CA: 68916676.2 Demand (with redundancy) for Phoenix, AZ: 28178433.600000005 Demand (with redundancy) for Houston, TX: 41201952.00000001 Demand (with redundancy) for Chicago, IL: 50567694.60000001 Demand (with redundancy) for New York City, NY: 155818072.79999998
Supply Constraints
$ \sum_{j}^{5} [ X_{i,j} ] \leq S_i $
for $i$ = {Phoenix, ..., Milwaukee} (28)
where:
$X_{i,j}$: Gigabytes (GB) traffic between site "i" and city "j"
$S_i$: Maximum capacity (in monthly GB) from site "i"
supply_cap_PB = 250 #
supply_cap = supply_cap_PB * 10**6
# Supply
for site in range(I):
lhs = sum(X[site, city] for city in range(len(cities)))
model.addConstr(lhs <= supply_cap, name='Supply for site'+db_sites[site])
Temperature Constraints
$ Y_i*F_i \leq 64 $
for $i$ = {Phoenix, ..., Milwaukee} (28)
where: $F_i $: Average temperature of site "i" in Farenheit.
# 75th percentile of average temperature
q3_mean_temp = df_sites.Mean_Temp_F.describe()['75%']
print(f'The 75th percentile of average temperature is {round(q3_mean_temp)} F')
for site in range(I):
lhs = Y[site]*AVG_TEMP[site]
model.addConstr(lhs <= q3_mean_temp, name = 'Temperature'+db_sites[site])
The 75th percentile of average temperature is 62 F
Extreme Weather Constraints
$ \sum_i^{28}[Y_i*w_i] \leq \sum{Y}*0.3 $
# Number of extreme weather events less than 0.3 (75th percentile in the dataset)
lhs = sum( Y[i]*PROB_EXTREME_WEATHER[i] for i in range(I) )
model.addConstr(lhs <= 0.3*sum(Y) , name = 'Weather Event') # average of 0.3 is 75% percentile
<gurobi.Constr *Awaiting Model Update*>
Supporting Logical Constraints
# Supporting Constraints
M = 10**10 # Big M for constraint formulation
for site in range(len(db_sites)):
site_name = db_sites[site]
# Decision constraint: site has to be build for it to service cities
lhs = sum(X[site, city] for city in range(len(cities)))
name = f'Decision constraint {site_name}'
model.addConstr(lhs <= Y[site]*M, name=name)
# Site can only have links with cities if site has been built
name = f'Only link {site_name} with cities if built'
model.addConstr( sum(Z[site, city] for city in range(len(cities)) ) <= Y[site]*5, name=name)
# Site can only traffic data if site has been built
for city in range(len(cities)):
model.addConstr( X[site, city] <= Z[site, city]*M)
model.update()
#model.params.NonConvex = 2
model.optimize()
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (win64) Thread count: 4 physical cores, 8 logical processors, using up to 8 threads Optimize a model with 258 rows, 308 columns and 951 nonzeros Model fingerprint: 0x4213b4dd Variable types: 140 continuous, 168 integer (168 binary) Coefficient statistics: Matrix range [8e-02, 1e+10] Objective range [1e+05, 2e+07] Bounds range [1e+00, 1e+00] RHS range [6e+01, 3e+08] Warning: Model contains large matrix coefficients Consider reformulating model or setting NumericFocus parameter to avoid numerical issues. Found heuristic solution: objective 8.392917e+07 Presolve removed 85 rows and 77 columns Presolve time: 0.01s Presolved: 173 rows, 231 columns, 693 nonzeros Variable types: 105 continuous, 126 integer (126 binary) Root relaxation: objective 2.211356e+07, 227 iterations, 0.01 seconds Nodes | Current Node | Objective Bounds | Work Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time 0 0 2.2114e+07 0 8 8.3929e+07 2.2114e+07 73.7% - 0s H 0 0 6.646889e+07 2.2114e+07 66.7% - 0s H 0 0 4.981657e+07 2.2114e+07 55.6% - 0s H 0 0 4.864410e+07 2.2114e+07 54.5% - 0s H 0 0 3.623800e+07 2.2114e+07 39.0% - 0s H 0 0 3.461098e+07 2.2114e+07 36.1% - 0s 0 0 2.3452e+07 0 8 3.4611e+07 2.3452e+07 32.2% - 0s H 0 0 3.271936e+07 2.3452e+07 28.3% - 0s 0 0 2.4527e+07 0 17 3.2719e+07 2.4527e+07 25.0% - 0s 0 0 2.4699e+07 0 16 3.2719e+07 2.4699e+07 24.5% - 0s 0 0 2.4733e+07 0 19 3.2719e+07 2.4733e+07 24.4% - 0s 0 0 2.5032e+07 0 18 3.2719e+07 2.5032e+07 23.5% - 0s 0 0 2.5075e+07 0 11 3.2719e+07 2.5075e+07 23.4% - 0s 0 0 2.5267e+07 0 22 3.2719e+07 2.5267e+07 22.8% - 0s 0 0 2.5313e+07 0 23 3.2719e+07 2.5313e+07 22.6% - 0s 0 0 2.5321e+07 0 24 3.2719e+07 2.5321e+07 22.6% - 0s 0 0 2.5355e+07 0 12 3.2719e+07 2.5355e+07 22.5% - 0s 0 0 2.5355e+07 0 12 3.2719e+07 2.5355e+07 22.5% - 0s 0 2 2.5355e+07 0 12 3.2719e+07 2.5355e+07 22.5% - 0s Cutting planes: Cover: 15 Implied bound: 29 Clique: 3 MIR: 1 Flow cover: 12 RLT: 1 Relax-and-lift: 6 Explored 465 nodes (5845 simplex iterations) in 0.19 seconds Thread count was 8 (of 8 available processors) Solution count 7: 3.27194e+07 3.4611e+07 3.6238e+07 ... 8.39292e+07 Optimal solution found (tolerance 1.00e-04) Best objective 3.271936322556e+07, best bound 3.271936322556e+07, gap 0.0000%
# print tidy results
print(f'Model has {model.NumVars} variables and {model.NumConstrs} constraints.')
print(f'Current minimal cost found is: ${round(model.objVal, 2)}')
# print()
V = model.getVars()
selected = []
# we create a list of selected cities
for v in V:
if v.x >=1:
if v.VarName[:3] == 'Dec':
selected.append( v.VarName)
# print only if variable is non-0 to avoid printing all the model variables (impossible to read)
print(v.VarName, ' ', round(v.X, 0))
for site in range(I):
sumGB = 0 # To store sum of GB in each site
for city in range(J):
if X[site, city].x > 1:
sumGB += X[site, city].x
if sumGB >= 1:
print(f"Sum of GB in {Y[site].VarName[26:]}: {round(sumGB,0)}")
Model has 308 variables and 258 constraints. Current minimal cost found is: $32719363.23 GB from site Denver to city Los Angeles, CA 68916676.0 GB from site Denver to city Phoenix, AZ 28178434.0 GB from site Memphis to city Houston, TX 41201952.0 GB from site Memphis to city Chicago, IL 50567695.0 GB from site Memphis to city New York City, NY 155818073.0 Fiber linkage between Denver and city Los Angeles, CA 1.0 Fiber linkage between Denver and city Phoenix, AZ 1.0 Fiber linkage between Memphis and city Houston, TX 1.0 Fiber linkage between Memphis and city Chicago, IL 1.0 Fiber linkage between Memphis and city New York City, NY 1.0 Decision to build site in Denver 1.0 Decision to build site in Memphis 1.0 Sum of GB in Denver: 68916676.0 Sum of GB in Denver: 97095110.0 Sum of GB in Denver: 97095110.0 Sum of GB in Denver: 97095110.0 Sum of GB in Denver: 97095110.0 Sum of GB in Memphis: 41201952.0 Sum of GB in Memphis: 91769647.0 Sum of GB in Memphis: 247587719.0
# quick visualization to understand how sites are meeting the cities' demands.
traffic = np.zeros( (I,J))
for i in range(I):
for j in range(J):
if X[i,j].X != 0.0:
traffic[i,j] = X[i,j].X
traffic = pd.DataFrame( traffic.astype(int) )
traffic.index=db_sites
traffic.columns = cities
traffic = traffic[traffic.sum(axis=1) != 0]
sns.heatmap(traffic)
plt.yticks(rotation=1)
plt.xticks(rotation=15)
(array([0.5, 1.5, 2.5, 3.5, 4.5]), [Text(0.5, 0, 'Los Angeles, CA'), Text(1.5, 0, 'Phoenix, AZ'), Text(2.5, 0, 'Houston, TX'), Text(3.5, 0, 'Chicago, IL'), Text(4.5, 0, 'New York City, NY')])
selected_sites = [ i[26:] for i in selected ]
cols = ['City',
'LATITUDE_X',
'LONGITUDE_Y',
'Composite_Cost',
'Mean_Temp_F',
'EXTREME_WEATH_EVENTS_YEARLY']
df_selected = df_sites[df_sites.City.isin(selected_sites)][cols]
df_selected = df_selected.reset_index(drop=True)
cols = ['Start','Start_lat','Start_long','End','End_lat','End_long','TRAFFIC']
df_linkage = pd.DataFrame(columns=cols)
V = model.getVars()
link_start = []
link_end = []
for v in V:
if (v.VarName[:3] == 'Fib') & (v.X > 0):
#print(v.VarName, ' ', round(v.X, 0))
link_start.append(re.search("between (.*) and", v.VarName).group(1))
link_end.append(re.search("city (.*),", v.VarName).group(1))
df_linkage['Start'] = link_start
df_linkage['End'] = link_end
#Getting the longitude and latitude for Start and End cities
df_linkage['Start_lat'] = df_linkage.Start.map(dict(zip(df_sites.City, df_sites.LATITUDE_X)))
df_linkage['Start_long'] = df_linkage.Start.map(dict(zip(df_sites.City, df_sites.LONGITUDE_Y)))
df_linkage['End_lat'] = df_linkage.End.map(dict(zip(df_cities.CityAbv, df_cities.LATITUDE_X)))
df_linkage['End_long'] = df_linkage.End.map(dict(zip(df_cities.CityAbv, df_cities.LONGITUDE_Y)))
df_linkage['TRAFFIC'] = [0, 68916676, 28178433, 41201952, 50567694, 155818072]
for i in traffic.index:
for c in traffic.columns:
df_linkage.loc[(df_linkage.Start == i) &
(df_linkage.End == c[:-4]), 'TRAFFIC']== traffic.loc[i,c]
fig = go.Figure()
height = 600
width = 1200
fig = px.scatter_mapbox(df_cities, lat="LATITUDE_X", lon="LONGITUDE_Y", hover_name="City",
color_discrete_sequence=["green"], zoom=3, height=height, width=width, size='NR_HOUSEHOLDS')
fig2 = px.scatter_mapbox(df_selected, lat="LATITUDE_X", lon="LONGITUDE_Y", hover_name="City",
hover_data = ['Composite_Cost', 'Mean_Temp_F', 'EXTREME_WEATH_EVENTS_YEARLY'],
color_discrete_sequence=["red"], zoom=3, height=height, width=width, size=[1]*traffic.shape[0] )
fig3 = px.scatter_mapbox(df_sites, lat="LATITUDE_X", lon="LONGITUDE_Y", hover_name="City",
hover_data = ['Composite_Cost', 'Mean_Temp_F', 'EXTREME_WEATH_EVENTS_YEARLY'],
color_discrete_sequence=["blue"], zoom=3, height=height, width=width)
fig.add_trace(fig3.data[0]) # adds the line trace to the first figure
fig.add_trace(fig2.data[0]) # adds the line trace to the first figure
for i in range(df_linkage.shape[0]):
trace = go.Scattermapbox({
'lat': [df_linkage['Start_lat'][i], df_linkage['End_lat'][i]] ,
'lon' : [df_linkage['Start_long'][i], df_linkage['End_long'][i]],
'line': {'color': 'black', 'width': 3},
'mode': 'lines',
'subplot':'mapbox',
'opacity': max(0.3, df_linkage['TRAFFIC'][i]/df_linkage.TRAFFIC.max()),
'text' : 'a',
})
fig.add_trace(trace)
fig.update_layout(
mapbox_style="open-street-map",
title='Selection of datacenters (in red) to service cities (in green)',
)
#fig.update_layout(margin={"r":0,"t":10,"l":0,"b":0})
fig.show()
#List of sites that are not selected
not_selected = list(set(df_sites.City.tolist()) - set(df_selected.City.tolist()))
#Getting dataframes with all the values for selected and not_selected cities
df_notselected_all = df_sites[df_sites['City'].isin(not_selected)]
df_selected_all = df_sites[df_sites['City'].isin(df_selected.City)]
#Adding a column with binary 1 if selected and 0 if not selected to the main sites dataframe
df_sites['is_selected'] = 0
df_sites.loc[df_sites['City'].isin(df_selected.City), 'is_selected'] = 1
plt.style.use('ggplot')
#Getting graph of cost index of selected cities and not selected cities
fig, ax = plt.subplots(figsize = (10, 7))
sns.barplot(x = df_sites.Composite_Cost, y = df_sites.City, data = df_sites,
order=df_sites.sort_values('Composite_Cost', ascending=False).City,
hue="is_selected")
plt.show()
#Getting graph of mean temperature
fig, ax = plt.subplots(figsize = (10,7))
sns.barplot(x = df_sites.Mean_Temp_F, y = df_sites.City, data = df_sites,
order=df_sites.sort_values('Mean_Temp_F', ascending=False).City,
hue="is_selected")
plt.axvline(x=65, color="red", linewidth=1)
plt.show()
#Getting graph of count of all incidents
fig, ax = plt.subplots(figsize = (7,7))
sns.barplot(x = df_sites.Cnt_All_Incidents, y = df_sites.City, data = df_sites,
order=df_sites.sort_values('Cnt_All_Incidents', ascending=False).City,
hue="is_selected")
plt.show()
#Getting graph of cents per kwh
fig, ax = plt.subplots(figsize = (7,7))
sns.barplot(x = df_sites.cents_per_kwh,y = df_sites.City, data = df_sites,
order=df_sites.sort_values('cents_per_kwh', ascending=False).City,
hue="is_selected")
plt.show()
#Dataframe with distances from site to city and total distance
df_distances = df_sites.iloc[ : , [0, 34,35,36,37,38] ]
df_distances['Total_Distance'] = df_distances.iloc[:,1:].sum(axis=1)
df_distances = df_distances.sort_values(by='Total_Distance')
#Stacked barplot to see total distance and
df_distances.iloc[:,0:6].set_index('City').plot(kind='bar', stacked=True,
color=['darkblue', 'lightblue', 'darkgreen','seagreen','orange'], figsize=(16,6),
sort_columns=True)
plt.title('Distance from Site to City', fontsize=16)
plt.xlabel('City')
plt.ylabel('Distance')
plt.gca().get_xticklabels()[4].set_color("red") #Set tick Memphis to red
plt.gca().get_xticklabels()[5].set_color("red") #Set tick Denver to red
<ipython-input-61-8fb9561bf443>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
#Creating a new model for multiple scenarios
model_mult = gp.Model('Data Center Site: Multiple Scenarios')
# Decision Variables
# Rows: Candidate Sites (28)
# Columns: Cities (5)
I = len(db_sites)
J = len(cities)
#GB Trafic between sites and cities
names = [f'GB from site {site} to city {city}' for site in db_sites for city in cities]
X = model_mult.addVars(I, J, vtype=GRB.CONTINUOUS, lb=0, name=names)
#Fiber Linkage between sites and cities
names = [f'Fiber linkage between {site} and city {city}' for site in db_sites for city in cities]
Z = model_mult.addVars(I, J, vtype=GRB.BINARY, lb=0, ub=1, name=names)
#Decision to build site in candidate location
names = [f'Decision to build site in {site}' for site in db_sites]
Y = model_mult.addVars(I, vtype=GRB.BINARY, lb=0, ub=1, name=names)
$ \sum_{i}^{28} [Y_i* (c_i*k_1 + w_i * k_2 ) ] + \sum_{i}^{28}\sum_{j}^{5} [Z_{i,j} * (d_{i,j} * k_3)] $
Where
$Y_i$: Decision to build datacenter in site "i"
$c_i$: Index of construction costs in site "i"
$k_1 = 16,000,000$; Cost of building datacenter (constant)
$w_i$: Number of extreme weather events in a year at site "i"
$k_2 = 17,000$; Cost of downtime (constant)
$Z_{i,j}$: Decision to build fiber link between site "i" and city "j"
$d_{i,j}$: Distance in Km between site "i" and city "j"
$k_3 = 800$; Cost of long-haul fiber cable per Km (constant)
# Decision Function
exp = sum( Y[i]*
( COST_INDEX[i] * COST_OF_CONSTRUCTION
+ PROB_EXTREME_WEATHER[i] * COST_OF_DOWNTIME )
for i in range(I)) + \
sum( Z[i,j]*DISTANCES[i,j] * COST_OF_FIBER for i in range(I) for j in range(J))
model_mult.setObjective(exp, GRB.MINIMIZE)
Demand Constraint
$ \sum_{i}^{28} [ X_{i,j} ] > D_j$
for $j$ = {Los Angeles, ..., New York} (5)
where:
$X_{i,j}$: Gigabytes (GB) traffic between site "i" and city "j"
$D_j$: Demand (in monthly GB) from city "j", with no redundancy multiplier
#Demand Constraint
#Gurobi's multipler scenario feature requires the constraints to be formulated using add.Constrs and stored in a variable if you want to change the RHS of the constraint
demandConstr = model_mult.addConstrs(
(sum(X[site, city] for site in range(I)) >= DEMAND[city] for city in range(J) ), name=['Demand Multiplier'])
Supply Constraints
$ \sum_{i}^{5} [ X_{i,j} ] < S_i $
for $i$ = {Phoenix, ..., Milwaukee} (28)
where:
$X_{i,j}$: Gigabytes (GB) traffic between site "i" and city "j"
$S_i$: Maximum capacity (in monthly GB) from site "i"
supply_cap_PB = 250 # cap in petabytes 150
supply_cap = supply_cap_PB * 10**6
# Supply
#Since we are not changing the RHS of this constraint, this formulation works
for site in range(I):
lhs = sum(X[site, city] for city in range(len(cities)))
model_mult.addConstr(lhs <= supply_cap, name='Supply for site'+db_sites[site])
Temperature Constraints
$ Y_i*F_i < 64 $
for $i$ = {Phoenix, ..., Milwaukee} (28)
where: $F_i $: Average temperature of site "i" in Farenheit.
#Temperature constraint
tempConstr = model_mult.addConstrs((Y[site]*AVG_TEMP[site] <= 64 for site in range(I)), name = 'Temperature'+db_sites[site] )
Extreme Weather Constraints
$ \sum_i^{28}[Y_i*w_i] < \sum{Y}*0.3 $
# Number of extreme weather events less than 0.3 (75th percentile in the dataset)
lhs = sum( Y[i]*PROB_EXTREME_WEATHER[i] for i in range(I) )
model_mult.addConstr( lhs <= 0.3*sum(Y) , name = 'Weather Event') # average of 0.3 is 75% percentile
<gurobi.Constr *Awaiting Model Update*>
Supporting Logical Constraints
# Supporting Constraints
M = 10**10 # Big M for constraint formulation
for site in range(len(db_sites)):
site_name = db_sites[site]
# Decision constraint: site has to be build for it to service cities
lhs = sum(X[site, city] for city in range(len(cities)))
name = f'Decision constraint {site_name}'
model_mult.addConstr(lhs <= Y[site]*M, name=name)
# Site can only have links with cities if site has been built
name = f'Only link {site_name} with cities if built'
model_mult.addConstr( sum(Z[site, city] for city in range(len(cities)) ) <= Y[site]*5, name=name)
# Site can only traffic data if site has beel built
for city in range(len(cities)):
model_mult.addConstr( X[site, city] <= Z[site, city]*M)
#Setting the number of scenarios: for now 3, change the temperature constraint from 64 (base) to 70 to 74
model_mult.NumScenarios = 9
#Scenario 0: Base model, nothing to do except run standard, temp 64
model_mult.Params.ScenarioNumber = 0
model_mult.ScenNName = 'Base Model'
#Scenario 1: Change RHS of temperature to 70
model_mult.Params.ScenarioNumber = 1
model_mult.ScenNName = 'Model 1: Relax temperature constraint to 70 F'
for site in range(I):
tempConstr[site].ScenNRhs = 70
#Scenario 2: Change RHS of temperature to 60
model_mult.Params.ScenarioNumber = 2
model_mult.ScenNName = 'Model 2: Tighten temperature constraint to 60 F'
for site in range(I):
tempConstr[site].ScenNRhs = 60
#Scenario 3: Change demand rhs from 1x multiplier to 1.2x (same as DEMAND_REDUNDANT)
model_mult.Params.ScenarioNumber = 3
model_mult.ScenNName = 'Model 3: Demand * 1.2'
for city in range(J):
demandConstr[city].ScenNRhs = DEMAND[city] * 1.2
#Scenario 4: Change demand rhs from 1x multiplier to 1.5x (higher than DEMAND_REDUNDANT)
model_mult.Params.ScenarioNumber = 4
model_mult.ScenNName = 'Model 4: Demand * 1.5'
for city in range(J):
demandConstr[city].ScenNRhs = DEMAND[city] * 1.5
#Scenario 5: Change demand rhs from 1x multiplier to 2x (higher than DEMAND_REDUNDANT)
model_mult.Params.ScenarioNumber = 5
model_mult.ScenNName = 'Model 5: Demand * 2'
for city in range(J):
demandConstr[city].ScenNRhs = DEMAND[city] * 2
#Scenario 6: Change cost of fiber to $1000 per mile/ 1600 per km
model_mult.Params.ScenarioNumber = 6
model_mult.ScenNName = 'Model 6: Cost of fiber $1600'
for site in range(I):
for city in range(J):
Z[site, city].ScenNObj = DISTANCES[site,city] * COST_OF_FIBER * 2
#Scenario 7: Change cost of fiber to $100 per km
model_mult.Params.ScenarioNumber = 7
model_mult.ScenNName = 'Model 7: Cost of fiber $100'
for site in range(I):
for city in range(J):
Z[site, city].ScenNObj = DISTANCES[site,city] * COST_OF_FIBER * 0.125
#Scenario 8: Change cost of fiber to $2400 per km
model_mult.Params.ScenarioNumber = 8
model_mult.ScenNName = 'Model 8: Cost of fiber $2400'
for site in range(I):
for city in range(J):
Z[site, city].ScenNObj = DISTANCES[site,city] * COST_OF_FIBER * 3
model_mult.update()
model_mult.optimize()
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (win64) Thread count: 4 physical cores, 8 logical processors, using up to 8 threads Optimize a model with 258 rows, 308 columns and 951 nonzeros Model fingerprint: 0x67782014 Variable types: 140 continuous, 168 integer (168 binary) Coefficient statistics: Matrix range [8e-02, 1e+10] Objective range [1e+04, 2e+07] Bounds range [1e+00, 1e+00] RHS range [6e+01, 3e+08] Warning: Model contains large matrix coefficients Consider reformulating model or setting NumericFocus parameter to avoid numerical issues. Solving a multi-scenario model with 9 scenarios... Presolve removed 64 rows and 33 columns Presolve time: 0.00s Presolved: 561 rows, 650 columns, 1867 nonzeros Variable types: 125 continuous, 525 integer (525 binary) Found heuristic solution: objective 6.299757e+07 Root relaxation: objective 7.493603e+06, 368 iterations, 0.00 seconds Nodes | Current Node | Objective Bounds | Work Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time 0 0 7493602.57 0 22 6.2998e+07 7493602.57 88.1% - 0s H 0 0 2.891712e+07 7493602.57 74.1% - 0s 0 0 1.4548e+07 0 125 2.8917e+07 1.4548e+07 49.7% - 0s 0 0 1.4555e+07 0 27 2.8917e+07 1.4555e+07 49.7% - 0s 0 0 1.4604e+07 0 22 2.8917e+07 1.4604e+07 49.5% - 0s 0 0 1.4640e+07 0 25 2.8917e+07 1.4640e+07 49.4% - 0s H 0 0 2.874968e+07 1.4640e+07 49.1% - 0s 0 0 1.4643e+07 0 25 2.8750e+07 1.4643e+07 49.1% - 0s 0 0 1.4643e+07 0 25 2.8750e+07 1.4643e+07 49.1% - 0s 0 2 1.4643e+07 0 25 2.8750e+07 1.4643e+07 49.1% - 0s Scenario 2 has been solved. 8/9 scenarios left. Scenario 4 has been solved. 7/9 scenarios left. Scenario 7 has been solved. 6/9 scenarios left. Scenario 0 has been solved. 5/9 scenarios left. Scenario 1 has been solved. 4/9 scenarios left. Scenario 3 has been solved. 3/9 scenarios left. Scenario 5 has been solved. 2/9 scenarios left. Scenario 6 has been solved. 1/9 scenarios left. Scenario 8 has been solved. 0/9 scenarios left. Cutting planes: Cover: 33 Projected implied bound: 6 Flow cover: 30 Inf proof: 1 Zero half: 2 Relax-and-lift: 1 Explored 3091 nodes (89708 simplex iterations) in 1.84 seconds Thread count was 8 (of 8 available processors) Solution count 10: 2.87497e+07 2.87783e+07 2.88097e+07 ... 2.91921e+07 Optimal solution found (tolerance 1.00e-04) Best objective 2.874968404043e+07, best bound 2.874968404043e+07, gap 0.0000%
#Check if solution is feasible
Sol_status = 'Optimal' if GRB.OPTIMAL == 2 else 'Not optimal (others)'
print("Solution Status:", Sol_status)
#Print information related to each solution
for s in range(model_mult.NumScenarios):
model_mult.Params.ScenarioNumber = s #loop through the scenarios
print(f"\nFor Scenario {s} ({model_mult.ScenNName})")
print("Minimal Cost: ${}".format(round(model_mult.ScenNObjVal),2))
print("Solution is:")
for site in range(I):
if Y[site].ScenNX == 1:
print("Site {} is chosen".format(df_sites.City[site]) )
sumGB = 0 #keeps track of total GB
for site in range(I):
for city in range(J):
if Z[site, city].ScenNX == 1:
#Print the link between site and city and the GB served by that link
print("Link between site {} and city {} is made -> {}GB".format(df_sites.City[site], df_cities.City[city], round( X[site, city].ScenNX, 2 ) ))
sumGB += X[site, city].ScenNX #Prints total GB served
print(f"Total GB served with this solution {sumGB}")
Solution Status: Optimal For Scenario 0 (Base Model) Minimal Cost: $32719350 Solution is: Total GB served with this solution 287235691.0 For Scenario 1 (Model 1: Relax temperature constraint to 70 F) Minimal Cost: $31724854 Solution is: Site Cincinnati is chosen Site San Antonio is chosen Link between site Cincinnati and city Chicago, IL is made -> 42139745.5GB Link between site Cincinnati and city New York City, NY is made -> 129848394.0GB Link between site San Antonio and city Los Angeles, CA is made -> 57430563.5GB Link between site San Antonio and city Phoenix, AZ is made -> 23482028.0GB Link between site San Antonio and city Houston, TX is made -> 34334960.0GB Total GB served with this solution 287235691.00000006 For Scenario 2 (Model 2: Tighten temperature constraint to 60 F) Minimal Cost: $33040263 Solution is: Site Denver is chosen Site Cincinnati is chosen Link between site Denver and city Los Angeles, CA is made -> 57430563.5GB Link between site Denver and city Phoenix, AZ is made -> 23482028.0GB Link between site Denver and city Houston, TX is made -> 34334960.0GB Link between site Cincinnati and city Chicago, IL is made -> 42139745.5GB Link between site Cincinnati and city New York City, NY is made -> 129848394.0GB Total GB served with this solution 287235691.0000001 For Scenario 3 (Model 3: Demand * 1.2) Minimal Cost: $32719358 Solution is: Total GB served with this solution 344682829.20000005 For Scenario 4 (Model 4: Demand * 1.5) Minimal Cost: $33283593 Solution is: Site Denver is chosen Site Memphis is chosen Link between site Denver and city Los Angeles, CA is made -> 86145845.25GB Link between site Denver and city Phoenix, AZ is made -> 35223042.0GB Link between site Denver and city Chicago, IL is made -> 63209618.25GB Link between site Memphis and city Houston, TX is made -> 51502440.0GB Link between site Memphis and city New York City, NY is made -> 194772591.0GB Total GB served with this solution 430853536.5 For Scenario 5 (Model 5: Demand * 2) Minimal Cost: $47543063 Solution is: Site Denver is chosen Site Cincinnati is chosen Site Memphis is chosen Link between site Denver and city Los Angeles, CA is made -> 114861127.0GB Link between site Denver and city Phoenix, AZ is made -> 46964056.0GB Link between site Cincinnati and city Chicago, IL is made -> 84279491.0GB Link between site Cincinnati and city New York City, NY is made -> 165720509.0GB Link between site Memphis and city Houston, TX is made -> 68669920.0GB Link between site Memphis and city New York City, NY is made -> 93976279.0GB Total GB served with this solution 574471382.0 For Scenario 6 (Model 6: Cost of fiber $1600) Minimal Cost: $37016946 Solution is: Site Denver is chosen Site Memphis is chosen Link between site Denver and city Los Angeles, CA is made -> 57430563.5GB Link between site Denver and city Phoenix, AZ is made -> 23482028.0GB Link between site Memphis and city Houston, TX is made -> 34334960.0GB Link between site Memphis and city Chicago, IL is made -> 42139745.5GB Link between site Memphis and city New York City, NY is made -> 129848394.0GB Total GB served with this solution 287235691.0 For Scenario 7 (Model 7: Cost of fiber $100) Minimal Cost: $28749684 Solution is: Site Memphis is chosen Site Nashville is chosen Link between site Memphis and city Los Angeles, CA is made -> 57430563.5GB Link between site Memphis and city Phoenix, AZ is made -> 23482028.0GB Link between site Memphis and city Houston, TX is made -> 34334960.0GB Link between site Nashville and city Chicago, IL is made -> 42139745.5GB Link between site Nashville and city New York City, NY is made -> 129848394.0GB Total GB served with this solution 287235691.0 For Scenario 8 (Model 8: Cost of fiber $2400) Minimal Cost: $40103000 Solution is: Site San Diego is chosen Site Cincinnati is chosen Link between site San Diego and city Los Angeles, CA is made -> 57430563.5GB Link between site San Diego and city Phoenix, AZ is made -> 23482028.0GB Link between site Cincinnati and city Houston, TX is made -> 34334960.0GB Link between site Cincinnati and city Chicago, IL is made -> 42139745.5GB Link between site Cincinnati and city New York City, NY is made -> 129848394.0GB Total GB served with this solution 287235690.9999998
#Function to get a traffic dataframe for each scenario, used in both CreateDataFrameLinkage() and MapPlotLinks()
def CreateDataFrameTraffic (scen_num):
model_mult.Params.ScenarioNumber = scen_num
#Get the traffic
traffic =np.zeros( (I,J))
for i in range(I):
for j in range(J):
if X[i,j].ScenNX != 0.0:
traffic[i,j] = X[i,j].ScenNX
traffic = pd.DataFrame( traffic.astype(int) )
traffic.index=db_sites
traffic.columns = cities
traffic = traffic[traffic.sum(axis=1) != 0]
return traffic
#Function to create a df_linkage for each scenario, used for mapping
def CreateDataFrameLinkage (scen_num, traffic) :
model_mult.Params.ScenarioNumber = scen_num
selected = [] #initialize list for storing the selected sites for each scenario
#Get a list of selected sites for each scenario
for site in range(I):
if Y[site].ScenNX ==1:
selected.append(df_sites.City[site])
#Make df_selected, used for the map
cols = ['City', 'LATITUDE_X', 'LONGITUDE_Y', 'Composite_Cost', 'Mean_Temp_F', 'EXTREME_WEATH_EVENTS_YEARLY']
df_selected = df_sites[df_sites.City.isin(selected)][cols]
df_selected = df_selected.reset_index(drop=True)
#Initialize empty dataframe for linkage
cols = ['Start','Start_lat','Start_long','End','End_lat','End_long','Distance', 'TRAFFIC']
df_linkage = pd.DataFrame(columns=cols)
link_start = []
link_end = []
for site in range(I):
for city in range(J):
if Z[site, city].ScenNX == 1:
link_start.append(df_sites.City[site])
link_end.append(df_cities.City[city])
df_linkage['Start'] = link_start
df_linkage['End'] = link_end
#Getting the longitude and latitude for Start and End cities
df_linkage['Start_lat'] = df_linkage.Start.map(dict(zip(df_sites.City, df_sites.LATITUDE_X)))
df_linkage['Start_long'] = df_linkage.Start.map(dict(zip(df_sites.City, df_sites.LONGITUDE_Y)))
df_linkage['End_lat'] = df_linkage.End.map(dict(zip(df_cities.City, df_cities.LATITUDE_X)))
df_linkage['End_long'] = df_linkage.End.map(dict(zip(df_cities.City, df_cities.LONGITUDE_Y)))
#For now the function doesn't include traffic so won't have opacity, can add later
for i in traffic.index:
for c in traffic.columns:
df_linkage.loc[(df_linkage.Start == i) &
(df_linkage.End == c[:-4]), 'TRAFFIC']== traffic.loc[i,c]
#Get name of scenario
model_name = model_mult.ScenNName
return df_linkage, df_selected, model_name
['Denver', 'Memphis']
def MapPlotLinks (model_name, df_linkage, df_selected, traffic):
fig = go.Figure()
height = 600
width = 1200
df_selected['plot_dummy'] = 1
fig = px.scatter_mapbox(df_cities, lat="LATITUDE_X", lon="LONGITUDE_Y", hover_name="City",
color_discrete_sequence=["green"], zoom=3, height=height, width=width, size='NR_HOUSEHOLDS')
print(list(df_selected.City))
fig2 = px.scatter_mapbox(df_selected, lat="LATITUDE_X", lon="LONGITUDE_Y", hover_name="City",
hover_data = ['Composite_Cost', 'Mean_Temp_F', 'EXTREME_WEATH_EVENTS_YEARLY'],
color_discrete_sequence=["red"], zoom=3, height=height, width=width, size = 'plot_dummy' )
fig3 = px.scatter_mapbox(df_sites, lat="LATITUDE_X", lon="LONGITUDE_Y", hover_name="City",
hover_data = ['Composite_Cost', 'Mean_Temp_F', 'EXTREME_WEATH_EVENTS_YEARLY'],
color_discrete_sequence=["blue"], zoom=3, height=height, width=width)
fig.add_trace(fig3.data[0]) # adds the line trace to the first figure
fig.add_trace(fig2.data[0]) # adds the line trace to the first figure
for i in range(df_linkage.shape[0]):
trace = go.Scattermapbox({
'lat': [df_linkage['Start_lat'][i], df_linkage['End_lat'][i]] ,
'lon' : [df_linkage['Start_long'][i], df_linkage['End_long'][i]],
'line': {'color': 'black', 'width': 3},
'mode': 'lines',
'subplot':'mapbox',
'opacity': max(0.3, df_linkage['TRAFFIC'][i]/df_linkage.TRAFFIC.max()),
'text' : 'a'
})
fig.add_trace(trace)
fig.update_layout(mapbox_style="open-street-map",
title=f'Selection of datacenters (in red) to service cities (in green): {model_name}',
)
#fig.update_layout(margin={"r":0,"t":10,"l":0,"b":0})
fig.show()
#Function that wraps preceding 3 functions for easier calling
def GetMapScenario(scen_num):
traffic = CreateDataFrameTraffic(scen_num)
df_linkage_scen, df_selected_scen, model_name = CreateDataFrameLinkage(scen_num, traffic)
MapPlotLinks(model_name, df_linkage_scen, df_selected_scen, traffic)
#Map for scenario 1: temperature from 64 to 70
GetMapScenario(1)
['Cincinnati', 'San Antonio']
#Map for scenario 2: temperature from 64 to 60
GetMapScenario(2)
['Denver', 'Cincinnati']
#Map for scenario 4: demand * 1.5
GetMapScenario(4)
['Denver', 'Memphis']
#Map for scenario 5: Demand * 2
GetMapScenario(5)
['Denver', 'Cincinnati', 'Memphis']
#Map for scenario 6: cost of fiber from $800 to $1600 per km
GetMapScenario(6)
['Denver', 'Memphis']
#Map for scenario 7: cost of fiber from $800 to $100 per km
GetMapScenario(7)
['Memphis', 'Nashville']
#Map for scenario 8: cost of fiber from $800 to $2400 per km
GetMapScenario(8)
['San Diego', 'Cincinnati']