Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ecommerce_delivery/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# E-commerce Delivery Modeling and Optimization
# Supports: parcel delivery (current), food delivery (future)
62 changes: 62 additions & 0 deletions ecommerce_delivery/data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""
Data processing module for e-commerce delivery modeling.
Loads shopping location CSV and prepares data for delivery optimization.
"""

import pandas as pd
import numpy as np


def load_shopping_data(csv_path: str) -> pd.DataFrame:
"""Load shopping locations CSV and return a cleaned DataFrame."""
df = pd.read_csv(csv_path)
required_cols = ['ShopLat', 'ShopLon', 'DeliveryLat', 'DeliveryLon']
for col in required_cols:
if col not in df.columns:
raise ValueError(f"Missing required column: {col}")
# Drop rows with missing coordinates
df = df.dropna(subset=required_cols).reset_index(drop=True)
return df


def euclidean_distance(lat1, lon1, lat2, lon2):
"""
Compute Euclidean (straight-line) distance between two points
using a flat-earth approximation in miles.
At ~39.8N latitude: 1 degree lat ≈ 69.0 miles, 1 degree lon ≈ 53.4 miles.
"""
lat_miles = 69.0
lon_miles = 53.4 # approximate for Indianapolis latitude
dy = (lat2 - lat1) * lat_miles
dx = (lon2 - lon1) * lon_miles
return np.sqrt(dx**2 + dy**2)


def build_distance_matrix(lats, lons):
"""
Build a symmetric Euclidean distance matrix for a set of locations.
Parameters
----------
lats : array-like of latitudes
lons : array-like of longitudes
Returns
-------
dist_matrix : np.ndarray of shape (n, n)
"""
n = len(lats)
lats = np.array(lats, dtype=float)
lons = np.array(lons, dtype=float)
dist_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i + 1, n):
d = euclidean_distance(lats[i], lons[i], lats[j], lons[j])
dist_matrix[i, j] = d
dist_matrix[j, i] = d
return dist_matrix


def compute_num_trucks(num_items: int, vehicle_capacity: int = 50) -> int:
"""Compute the number of trucks needed given item count and capacity."""
return int(np.ceil(num_items / vehicle_capacity))
240 changes: 240 additions & 0 deletions ecommerce_delivery/depot_strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
"""
Depot determination strategies for e-commerce delivery.
Three strategies:
1. Amazon Delivery Station: K-means clustering on shopping locations,
depot = centroid of each cluster. K = number of trucks.
2. Walmart Supercenter: Assign shopping locations to nearest existing
Walmart Supercenter. Supports both shopping-location-oriented and
home-location-oriented assignment.
3. Local Delivery Company: Divide map into N×N grid, depot = centroid
of each grid cell. Assign home locations within each grid cell.
"""

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

from .data_processing import euclidean_distance, compute_num_trucks
from .walmart_locations import get_walmart_supercenters


def amazon_ds_strategy(df: pd.DataFrame, vehicle_capacity: int = 50):
"""
Amazon Delivery Station strategy.
Cluster shopping locations into K groups (K = number of trucks needed).
The depot for each group is the centroid of the cluster.
Parameters
----------
df : DataFrame with ShopLat, ShopLon, DeliveryLat, DeliveryLon columns
vehicle_capacity : max items per truck
Returns
-------
list of dicts, each with:
- depot_lat, depot_lon: centroid of cluster
- delivery_locations: list of (lat, lon) home locations
- shop_locations: list of (lat, lon) shop locations
- indices: original DataFrame indices
"""
num_items = len(df)
K = compute_num_trucks(num_items, vehicle_capacity)

shop_coords = df[['ShopLat', 'ShopLon']].values
kmeans = KMeans(n_clusters=K, random_state=42, n_init=10)
labels = kmeans.fit_predict(shop_coords)

routes = []
for k in range(K):
mask = labels == k
cluster_df = df[mask]
centroid = kmeans.cluster_centers_[k]
routes.append({
'depot_lat': centroid[0],
'depot_lon': centroid[1],
'delivery_locations': list(zip(
cluster_df['DeliveryLat'].values,
cluster_df['DeliveryLon'].values
)),
'shop_locations': list(zip(
cluster_df['ShopLat'].values,
cluster_df['ShopLon'].values
)),
'indices': cluster_df.index.tolist(),
'strategy': 'amazon_ds',
'cluster_id': k,
})
return routes


def walmart_strategy(df: pd.DataFrame, assignment_mode: str = 'shopping',
vehicle_capacity: int = 50, custom_locations=None):
"""
Walmart Supercenter strategy.
Assign shopping/home locations to the nearest Walmart Supercenter.
Each supercenter becomes a depot. If a depot has more items than
vehicle_capacity, split into multiple routes.
Parameters
----------
df : DataFrame with ShopLat, ShopLon, DeliveryLat, DeliveryLon columns
assignment_mode : 'shopping' (assign by shop location) or 'home' (assign by home location)
vehicle_capacity : max items per truck
custom_locations : optional list of dicts with lat, lon, name
Returns
-------
list of route dicts (same structure as amazon_ds_strategy)
"""
walmarts = custom_locations if custom_locations else get_walmart_supercenters()
walmart_lats = np.array([w['lat'] for w in walmarts])
walmart_lons = np.array([w['lon'] for w in walmarts])

if assignment_mode == 'shopping':
ref_lats = df['ShopLat'].values
ref_lons = df['ShopLon'].values
elif assignment_mode == 'home':
ref_lats = df['DeliveryLat'].values
ref_lons = df['DeliveryLon'].values
else:
raise ValueError(f"assignment_mode must be 'shopping' or 'home', got '{assignment_mode}'")

# Assign each item to the nearest Walmart
assignments = []
for i in range(len(df)):
dists = [
euclidean_distance(ref_lats[i], ref_lons[i], wlat, wlon)
for wlat, wlon in zip(walmart_lats, walmart_lons)
]
assignments.append(np.argmin(dists))

df = df.copy()
df['walmart_idx'] = assignments

routes = []
for w_idx in df['walmart_idx'].unique():
w_df = df[df['walmart_idx'] == w_idx]
walmart = walmarts[w_idx]

# Split into sub-routes if exceeding vehicle capacity
num_sub_routes = compute_num_trucks(len(w_df), vehicle_capacity)
indices = w_df.index.tolist()

for sub in range(num_sub_routes):
start = sub * vehicle_capacity
end = min((sub + 1) * vehicle_capacity, len(w_df))
sub_indices = indices[start:end]
sub_df = df.loc[sub_indices]

routes.append({
'depot_lat': walmart['lat'],
'depot_lon': walmart['lon'],
'depot_name': walmart.get('name', f'Walmart #{w_idx}'),
'delivery_locations': list(zip(
sub_df['DeliveryLat'].values,
sub_df['DeliveryLon'].values
)),
'shop_locations': list(zip(
sub_df['ShopLat'].values,
sub_df['ShopLon'].values
)),
'indices': sub_indices,
'strategy': 'walmart',
'walmart_idx': w_idx,
'sub_route': sub,
})
return routes


def local_delivery_strategy(df: pd.DataFrame, N: int = 5,
vehicle_capacity: int = 50):
"""
Local delivery company strategy.
Divide the map into N×N grid cells. The depot for each cell is
the centroid of the cell. Home locations within each cell are
assigned to that cell's depot.
Parameters
----------
df : DataFrame with DeliveryLat, DeliveryLon columns
N : grid size (N×N)
vehicle_capacity : max items per truck
Returns
-------
list of route dicts
"""
lat_min = df['DeliveryLat'].min()
lat_max = df['DeliveryLat'].max()
lon_min = df['DeliveryLon'].min()
lon_max = df['DeliveryLon'].max()

# Add small buffer to include boundary points
lat_step = (lat_max - lat_min) / N
lon_step = (lon_max - lon_min) / N

if lat_step == 0 or lon_step == 0:
raise ValueError("All delivery locations have the same coordinates.")

# Assign each home to a grid cell
df = df.copy()
df['grid_row'] = np.clip(
((df['DeliveryLat'] - lat_min) / lat_step).astype(int), 0, N - 1
)
df['grid_col'] = np.clip(
((df['DeliveryLon'] - lon_min) / lon_step).astype(int), 0, N - 1
)
df['grid_id'] = df['grid_row'] * N + df['grid_col']

routes = []
for grid_id in df['grid_id'].unique():
g_df = df[df['grid_id'] == grid_id]
row = g_df['grid_row'].iloc[0]
col = g_df['grid_col'].iloc[0]

# Centroid of grid cell
depot_lat = lat_min + (row + 0.5) * lat_step
depot_lon = lon_min + (col + 0.5) * lon_step

# Split into sub-routes if exceeding capacity
num_sub_routes = compute_num_trucks(len(g_df), vehicle_capacity)
indices = g_df.index.tolist()

for sub in range(num_sub_routes):
start = sub * vehicle_capacity
end = min((sub + 1) * vehicle_capacity, len(g_df))
sub_indices = indices[start:end]
sub_df = df.loc[sub_indices]

routes.append({
'depot_lat': depot_lat,
'depot_lon': depot_lon,
'delivery_locations': list(zip(
sub_df['DeliveryLat'].values,
sub_df['DeliveryLon'].values
)),
'shop_locations': list(zip(
sub_df['ShopLat'].values,
sub_df['ShopLon'].values
)),
'indices': sub_indices,
'strategy': 'local_delivery',
'grid_id': grid_id,
'grid_row': row,
'grid_col': col,
'sub_route': sub,
})

# Grid info for visualization
grid_info = {
'N': N,
'lat_min': lat_min, 'lat_max': lat_max,
'lon_min': lon_min, 'lon_max': lon_max,
'lat_step': lat_step, 'lon_step': lon_step,
}

return routes, grid_info
Loading