uSMART · yin195 · Mar 23, 2026 · Mar 23, 2026
diff --git a/ecommerce_delivery/__init__.py b/ecommerce_delivery/__init__.py
@@ -0,0 +1,2 @@
+# E-commerce Delivery Modeling and Optimization
+# Supports: parcel delivery (current), food delivery (future)
diff --git a/ecommerce_delivery/data_processing.py b/ecommerce_delivery/data_processing.py
@@ -0,0 +1,62 @@
+"""
+Data processing module for e-commerce delivery modeling.
+Loads shopping location CSV and prepares data for delivery optimization.
+"""
+
+import pandas as pd
+import numpy as np
+
+
+def load_shopping_data(csv_path: str) -> pd.DataFrame:
+    """Load shopping locations CSV and return a cleaned DataFrame."""
+    df = pd.read_csv(csv_path)
+    required_cols = ['ShopLat', 'ShopLon', 'DeliveryLat', 'DeliveryLon']
+    for col in required_cols:
+        if col not in df.columns:
+            raise ValueError(f"Missing required column: {col}")
+    # Drop rows with missing coordinates
+    df = df.dropna(subset=required_cols).reset_index(drop=True)
+    return df
+
+
+def euclidean_distance(lat1, lon1, lat2, lon2):
+    """
+    Compute Euclidean (straight-line) distance between two points
+    using a flat-earth approximation in miles.
+    At ~39.8N latitude: 1 degree lat ≈ 69.0 miles, 1 degree lon ≈ 53.4 miles.
+    """
+    lat_miles = 69.0
+    lon_miles = 53.4  # approximate for Indianapolis latitude
+    dy = (lat2 - lat1) * lat_miles
+    dx = (lon2 - lon1) * lon_miles
+    return np.sqrt(dx**2 + dy**2)
+
+
+def build_distance_matrix(lats, lons):
+    """
+    Build a symmetric Euclidean distance matrix for a set of locations.
+
+    Parameters
+    ----------
+    lats : array-like of latitudes
+    lons : array-like of longitudes
+
+    Returns
+    -------
+    dist_matrix : np.ndarray of shape (n, n)
+    """
+    n = len(lats)
+    lats = np.array(lats, dtype=float)
+    lons = np.array(lons, dtype=float)
+    dist_matrix = np.zeros((n, n))
+    for i in range(n):
+        for j in range(i + 1, n):
+            d = euclidean_distance(lats[i], lons[i], lats[j], lons[j])
+            dist_matrix[i, j] = d
+            dist_matrix[j, i] = d
+    return dist_matrix
+
+
+def compute_num_trucks(num_items: int, vehicle_capacity: int = 50) -> int:
+    """Compute the number of trucks needed given item count and capacity."""
+    return int(np.ceil(num_items / vehicle_capacity))
diff --git a/ecommerce_delivery/depot_strategies.py b/ecommerce_delivery/depot_strategies.py
@@ -0,0 +1,240 @@
+"""
+Depot determination strategies for e-commerce delivery.
+
+Three strategies:
+1. Amazon Delivery Station: K-means clustering on shopping locations,
+   depot = centroid of each cluster. K = number of trucks.
+2. Walmart Supercenter: Assign shopping locations to nearest existing
+   Walmart Supercenter. Supports both shopping-location-oriented and
+   home-location-oriented assignment.
+3. Local Delivery Company: Divide map into N×N grid, depot = centroid
+   of each grid cell. Assign home locations within each grid cell.
+"""
+
+import numpy as np
+import pandas as pd
+from sklearn.cluster import KMeans
+
+from .data_processing import euclidean_distance, compute_num_trucks
+from .walmart_locations import get_walmart_supercenters
+
+
+def amazon_ds_strategy(df: pd.DataFrame, vehicle_capacity: int = 50):
+    """
+    Amazon Delivery Station strategy.
+
+    Cluster shopping locations into K groups (K = number of trucks needed).
+    The depot for each group is the centroid of the cluster.
+
+    Parameters
+    ----------
+    df : DataFrame with ShopLat, ShopLon, DeliveryLat, DeliveryLon columns
+    vehicle_capacity : max items per truck
+
+    Returns
+    -------
+    list of dicts, each with:
+        - depot_lat, depot_lon: centroid of cluster
+        - delivery_locations: list of (lat, lon) home locations
+        - shop_locations: list of (lat, lon) shop locations
+        - indices: original DataFrame indices
+    """
+    num_items = len(df)
+    K = compute_num_trucks(num_items, vehicle_capacity)
+
+    shop_coords = df[['ShopLat', 'ShopLon']].values
+    kmeans = KMeans(n_clusters=K, random_state=42, n_init=10)
+    labels = kmeans.fit_predict(shop_coords)
+
+    routes = []
+    for k in range(K):
+        mask = labels == k
+        cluster_df = df[mask]
+        centroid = kmeans.cluster_centers_[k]
+        routes.append({
+            'depot_lat': centroid[0],
+            'depot_lon': centroid[1],
+            'delivery_locations': list(zip(
+                cluster_df['DeliveryLat'].values,
+                cluster_df['DeliveryLon'].values
+            )),
+            'shop_locations': list(zip(
+                cluster_df['ShopLat'].values,
+                cluster_df['ShopLon'].values
+            )),
+            'indices': cluster_df.index.tolist(),
+            'strategy': 'amazon_ds',
+            'cluster_id': k,
+        })
+    return routes
+
+
+def walmart_strategy(df: pd.DataFrame, assignment_mode: str = 'shopping',
+                     vehicle_capacity: int = 50, custom_locations=None):
+    """
+    Walmart Supercenter strategy.
+
+    Assign shopping/home locations to the nearest Walmart Supercenter.
+    Each supercenter becomes a depot. If a depot has more items than
+    vehicle_capacity, split into multiple routes.
+
+    Parameters
+    ----------
+    df : DataFrame with ShopLat, ShopLon, DeliveryLat, DeliveryLon columns
+    assignment_mode : 'shopping' (assign by shop location) or 'home' (assign by home location)
+    vehicle_capacity : max items per truck
+    custom_locations : optional list of dicts with lat, lon, name
+
+    Returns
+    -------
+    list of route dicts (same structure as amazon_ds_strategy)
+    """
+    walmarts = custom_locations if custom_locations else get_walmart_supercenters()
+    walmart_lats = np.array([w['lat'] for w in walmarts])
+    walmart_lons = np.array([w['lon'] for w in walmarts])
+
+    if assignment_mode == 'shopping':
+        ref_lats = df['ShopLat'].values
+        ref_lons = df['ShopLon'].values
+    elif assignment_mode == 'home':
+        ref_lats = df['DeliveryLat'].values
+        ref_lons = df['DeliveryLon'].values
+    else:
+        raise ValueError(f"assignment_mode must be 'shopping' or 'home', got '{assignment_mode}'")
+
+    # Assign each item to the nearest Walmart
+    assignments = []
+    for i in range(len(df)):
+        dists = [
+            euclidean_distance(ref_lats[i], ref_lons[i], wlat, wlon)
+            for wlat, wlon in zip(walmart_lats, walmart_lons)
+        ]
+        assignments.append(np.argmin(dists))
+
+    df = df.copy()
+    df['walmart_idx'] = assignments
+
+    routes = []
+    for w_idx in df['walmart_idx'].unique():
+        w_df = df[df['walmart_idx'] == w_idx]
+        walmart = walmarts[w_idx]
+
+        # Split into sub-routes if exceeding vehicle capacity
+        num_sub_routes = compute_num_trucks(len(w_df), vehicle_capacity)
+        indices = w_df.index.tolist()
+
+        for sub in range(num_sub_routes):
+            start = sub * vehicle_capacity
+            end = min((sub + 1) * vehicle_capacity, len(w_df))
+            sub_indices = indices[start:end]
+            sub_df = df.loc[sub_indices]
+
+            routes.append({
+                'depot_lat': walmart['lat'],
+                'depot_lon': walmart['lon'],
+                'depot_name': walmart.get('name', f'Walmart #{w_idx}'),
+                'delivery_locations': list(zip(
+                    sub_df['DeliveryLat'].values,
+                    sub_df['DeliveryLon'].values
+                )),
+                'shop_locations': list(zip(
+                    sub_df['ShopLat'].values,
+                    sub_df['ShopLon'].values
+                )),
+                'indices': sub_indices,
+                'strategy': 'walmart',
+                'walmart_idx': w_idx,
+                'sub_route': sub,
+            })
+    return routes
+
+
+def local_delivery_strategy(df: pd.DataFrame, N: int = 5,
+                            vehicle_capacity: int = 50):
+    """
+    Local delivery company strategy.
+
+    Divide the map into N×N grid cells. The depot for each cell is
+    the centroid of the cell. Home locations within each cell are
+    assigned to that cell's depot.
+
+    Parameters
+    ----------
+    df : DataFrame with DeliveryLat, DeliveryLon columns
+    N : grid size (N×N)
+    vehicle_capacity : max items per truck
+
+    Returns
+    -------
+    list of route dicts
+    """
+    lat_min = df['DeliveryLat'].min()
+    lat_max = df['DeliveryLat'].max()
+    lon_min = df['DeliveryLon'].min()
+    lon_max = df['DeliveryLon'].max()
+
+    # Add small buffer to include boundary points
+    lat_step = (lat_max - lat_min) / N
+    lon_step = (lon_max - lon_min) / N
+
+    if lat_step == 0 or lon_step == 0:
+        raise ValueError("All delivery locations have the same coordinates.")
+
+    # Assign each home to a grid cell
+    df = df.copy()
+    df['grid_row'] = np.clip(
+        ((df['DeliveryLat'] - lat_min) / lat_step).astype(int), 0, N - 1
+    )
+    df['grid_col'] = np.clip(
+        ((df['DeliveryLon'] - lon_min) / lon_step).astype(int), 0, N - 1
+    )
+    df['grid_id'] = df['grid_row'] * N + df['grid_col']
+
+    routes = []
+    for grid_id in df['grid_id'].unique():
+        g_df = df[df['grid_id'] == grid_id]
+        row = g_df['grid_row'].iloc[0]
+        col = g_df['grid_col'].iloc[0]
+
+        # Centroid of grid cell
+        depot_lat = lat_min + (row + 0.5) * lat_step
+        depot_lon = lon_min + (col + 0.5) * lon_step
+
+        # Split into sub-routes if exceeding capacity
+        num_sub_routes = compute_num_trucks(len(g_df), vehicle_capacity)
+        indices = g_df.index.tolist()
+
+        for sub in range(num_sub_routes):
+            start = sub * vehicle_capacity
+            end = min((sub + 1) * vehicle_capacity, len(g_df))
+            sub_indices = indices[start:end]
+            sub_df = df.loc[sub_indices]
+
+            routes.append({
+                'depot_lat': depot_lat,
+                'depot_lon': depot_lon,
+                'delivery_locations': list(zip(
+                    sub_df['DeliveryLat'].values,
+                    sub_df['DeliveryLon'].values
+                )),
+                'shop_locations': list(zip(
+                    sub_df['ShopLat'].values,
+                    sub_df['ShopLon'].values
+                )),
+                'indices': sub_indices,
+                'strategy': 'local_delivery',
+                'grid_id': grid_id,
+                'grid_row': row,
+                'grid_col': col,
+                'sub_route': sub,
+            })
+
+    # Grid info for visualization
+    grid_info = {
+        'N': N,
+        'lat_min': lat_min, 'lat_max': lat_max,
+        'lon_min': lon_min, 'lon_max': lon_max,
+        'lat_step': lat_step, 'lon_step': lon_step,
+    }
+
+    return routes, grid_info
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# E-commerce Delivery Modeling and Optimization
		# Supports: parcel delivery (current), food delivery (future)