experiment.py

import pandas as pd
import time
import sys
from random import shuffle
import pdb
import os


def run(experiment_id, env, agent, num_steps, no_op_steps=2, average_cycle=1, cheat=False, cheat_cycle=15):
    score = pd.DataFrame(
        columns=[
            "dayofweek",
            "minofday",
            "requests",
            "wait_time",
            "reject",
            "total_service_time",
            "total_idle_time",
            "gas",
            "resource",
            "dispatch",
            "reward",
            "effective_dist",
            "actual_dist",
            "agent_time",
            "original_requests",
            "curr_passengers",
            "curr_packages"
        ]
    )

    vehicles, requests, __, __, __, __, __, __ = env.step()
    for _ in range(no_op_steps - 2):
        _, requests_, __, __, __, __, __, __ = env.step()
        requests = requests.append(requests_)
    # pdb.set_trace()
    if agent:
        agent.reset(requests, env.dayofweek, env.minofday)
    vehicles, requests, __, __, __, __, __, __ = env.step()
    start = time.time()
    prev_reward = 0
    prev_eff_dist = 0
    prev_act_dist = 0
    N = len(vehicles)
    for t in range(num_steps):
        if cheat and t % cheat_cycle == 0:
            if t > num_steps - 30:
                break
            future_requests = env.get_requests(num_steps=30)
            agent.update_future_demand(future_requests)

        agent_start = time.time()
        if agent:
            actions = agent.get_actions(vehicles, requests)
        else:
            actions = []

        agent_time = time.time() - agent_start
        dispatch = len(actions)
        dayofweek = env.dayofweek
        minofday = env.minofday
        vehicles, requests, wait, reject, gas, idle, original_requests, vehicle_scores = env.step(actions)
        ####### EDITED HERE ########
        if t % 100 == 0:
            fpath = "./experiments/results/vehicle_logs/" + str(experiment_id)
            fname = "/status_" + str(t) + ".csv"
            if not os.path.exists(fpath):
                os.makedirs(fpath)
            df = vehicles.set_index("id").join(vehicle_scores.set_index("id"))
            save_path = fpath + fname
            df.to_csv(save_path, index=False)
        ####### EDITED HERE ########
        avg_reward = vehicles.reward.mean()
        eff_dist_total = vehicles.eff_dist.sum()
        act_dist_total = vehicles.act_dist.sum()
        total_service_time = vehicle_scores.service_time.sum()

        curr_passengers = vehicles.curr_passengers.sum()

        try:
            curr_packages = vehicles.curr_packages.sum()
        except:
            curr_packages = 0

        score.loc[t] = (
            dayofweek,
            minofday,
            len(requests),
            wait,
            reject,
            total_service_time,
            idle,
            gas,
            sum(vehicles.available),
            dispatch,
            avg_reward - prev_reward,
            eff_dist_total - prev_eff_dist,
            act_dist_total - prev_act_dist,
            agent_time,
            original_requests,
            curr_passengers,
            curr_packages,
        )
        prev_reward = avg_reward
        prev_eff_dist = eff_dist_total
        prev_act_dist = act_dist_total

        if t > 0 and t % average_cycle == 0:
            elapsed = time.time() - start
            W, wait, reject, dispatch, reward, effective_dist, actual_dist, psg, pkg = score.loc[
                t - average_cycle : t - 1,
                [
                    "requests",
                    "wait_time",
                    "reject",
                    "dispatch",
                    "reward",
                    "effective_dist",
                    "actual_dist",
                    "curr_passengers",
                    "curr_packages",
                ],
            ].sum()
            print(
                "t = {:d} ({:.0f} elapsed) // REQ: {:.0f} / PsG: {:.0f} / PkG: {:.0f} / REJ: {:.0f}/ ACC: {:.1f} / WAIT: {:.0f}  / DSP: {:.2f} / RWD: {:.1f} / ED: {:.1f} / AD: {:.1f}".format(
                    int(t * env.cycle),
                    elapsed,
                    W,
                    psg,
                    pkg,
                    reject,
                    (W - reject),
                    wait,
                    dispatch / N,
                    reward,
                    effective_dist,
                    actual_dist,
                )
            )
            sys.stdout.flush()
        # pdb.set_trace()
    return score, env.get_vehicles_score()


def load_trips(scenario, trip_path, sample_size, skiprows=0):

    trip_cols = pd.read_csv(trip_path, nrows=1).columns
    trips = pd.read_csv(trip_path, names=trip_cols, nrows=sample_size, skiprows=skiprows + 1)
    trips["second"] -= trips.loc[0, "second"]
    duration = int(trips.second.values[-1] / 60)
    # pdb.set_trace()
    dayofweek = trips.loc[0, "dayofweek"]
    minofday = trips.loc[0, "hour"] * 60 + trips.loc[0, "minute"]
    features = ["trip_time", "phash", "plat", "plon", "dhash", "dlat", "dlon", "second", "trip_distance", "hop_flag"]
    if scenario == "DeepPool":
        features = ["trip_time", "phash", "plat", "plon", "dhash", "dlat", "dlon", "second", "trip_distance"]
    elif scenario == "Hybrid":
        features = [
            "trip_time",
            "phash",
            "plat",
            "plon",
            "dhash",
            "dlat",
            "dlon",
            "g_type",
            "second",
            "trip_distance",
            "hop_flag",
        ]
    trips = trips[features]

    return trips, dayofweek, minofday, duration


def load_trip_chunks(scenario, trip_path, num_trips, duration, offset=0, randomize=True):
    trips, dayofweek, minofday, minutes = load_trips(scenario, trip_path, num_trips)
    num_chunks = int(minutes / duration)
    # pdb.set_trace()
    chunks = []
    date = 1
    for _ in range(num_chunks):
        trips["second"] -= trips.second.values[0]
        chunk = trips[trips.second < (duration + offset) * 60.0]
        chunks.append((chunk, date, dayofweek, minofday))
        trips = trips[trips.second >= (duration + offset) * 60.0]

        minofday += duration
        if minofday >= 1440:  # 24 hour * 60 minute
            minofday -= 1440
            dayofweek = (dayofweek + 1) % 7
            date += 1
    if randomize:
        shuffle(chunks)

    return chunks


def load_trip_eval(trip_path, num_trips, day_start=4, no_op_steps=30):
    trips, dayofweek, minofday, minutes = load_trips(trip_path, num_trips)
    chunks = []
    day_shift = (7 - dayofweek) % 7
    # Start at 6 am on Monday
    trips = trips[trips.second >= ((day_shift * 24 + day_start) * 60 - no_op_steps) * 60]
    dayofweek = 0
    minofday = day_start * 60 - no_op_steps
    date = 1 + day_shift

    while len(trips):
        trips["second"] -= trips.second.values[0]
        day_chunk = trips[trips.second < (24 * 60 + no_op_steps) * 60.0]
        chunks.append((day_chunk, date, dayofweek, minofday))
        trips = trips[trips.second >= 24 * 60 * 60.0]
        dayofweek = (dayofweek + 1) % 7
        date += 1
        if dayofweek == 0:
            break

    return chunks


def describe(score, episode, fpath):
    total_requests = int(score.requests.sum())
    total_wait = score.wait_time.sum()
    total_reject = int(score.reject.sum())
    total_idle = int(score.total_idle_time.sum())
    total_service = int(score.total_service_time.sum())
    total_reward = score.reward.sum()
    avg_wait = total_wait / (total_requests - total_reject)
    reject_rate = float(total_reject) / total_requests
    effort = float(total_idle) / (total_requests * 0.2 - total_reject)
    avg_time = score.agent_time.mean()
    avg_num_transitions = (total_requests - total_reject) / (score.original_requests.sum())
    distance_ratio = (score.effective_dist.sum()) / (score.actual_dist.sum())
    total_time = (score.actual_dist.sum()) / (score.original_requests.sum() * 15)
    average_utilization = score.total_service_time / (score.total_service_time + score.total_idle_time)
    print("----------------------------------- EPISODE %d SUMMARY -----------------------------------" % episode)
    print(
        "REQUESTS: {0:d} / REJECTS: {1:d} / IDLE: {2:d} / SERVICE: {2:d} / REWARD: {3:.0f}/RATIO_EFF_DIST: {4:.2f}/TRANS: {5:.4f}".format(
            total_requests, total_reject, total_idle, total_service, total_reward, distance_ratio, avg_num_transitions
        )
    )
    print(
        "WAIT TIME: {0:.2f} / REJECT RATE: {1:.3f} / EFFORT: {2:.2f} / TIME: {3:.2f} / TRIP_TIME: {4:.4f} / AVG_UTILIZATION: {5:.2f}".format(
            avg_wait, reject_rate, effort, avg_time, total_time, average_utilization
        )
    )

    f = open(fpath, "a")
    f.write("----------------------------------- EPISODE %d SUMMARY -----------------------------------" % episode)
    f.write(
        "REQUESTS: {0:d} / REJECTS: {1:d} / IDLE: {2:d} / SERVICE: {2:d} / REWARD: {3:.0f}/RATIO_EFF_DIST: {4:.2f}/TRANS: {5:.4f}".format(
            total_requests, total_reject, total_idle, total_service, total_reward, distance_ratio, avg_num_transitions
        )
    )
    f.write(
        "WAIT TIME: {0:.2f} / REJECT RATE: {1:.3f} / EFFORT: {2:.2f} / TIME: {3:.2f} / TRIP_TIME: {4:.4f} / AVG_UTILIZATION: {5:.2f}".format(
            avg_wait, reject_rate, effort, avg_time, total_time, average_utilization
        )
    )
    f.close()
	import pandas as pd
	import time
	import sys
	from random import shuffle
	import pdb
	import os


	def run(experiment_id, env, agent, num_steps, no_op_steps=2, average_cycle=1, cheat=False, cheat_cycle=15):
	score = pd.DataFrame(
	columns=[
	"dayofweek",
	"minofday",
	"requests",
	"wait_time",
	"reject",
	"total_service_time",
	"total_idle_time",
	"gas",
	"resource",
	"dispatch",
	"reward",
	"effective_dist",
	"actual_dist",
	"agent_time",
	"original_requests",
	"curr_passengers",
	"curr_packages"
	]
	)

	vehicles, requests, __, __, __, __, __, __ = env.step()
	for _ in range(no_op_steps - 2):
	_, requests_, __, __, __, __, __, __ = env.step()
	requests = requests.append(requests_)
	# pdb.set_trace()
	if agent:
	agent.reset(requests, env.dayofweek, env.minofday)
	vehicles, requests, __, __, __, __, __, __ = env.step()
	start = time.time()
	prev_reward = 0
	prev_eff_dist = 0
	prev_act_dist = 0
	N = len(vehicles)
	for t in range(num_steps):
	if cheat and t % cheat_cycle == 0:
	if t > num_steps - 30:
	break
	future_requests = env.get_requests(num_steps=30)
	agent.update_future_demand(future_requests)

	agent_start = time.time()
	if agent:
	actions = agent.get_actions(vehicles, requests)
	else:
	actions = []

	agent_time = time.time() - agent_start
	dispatch = len(actions)
	dayofweek = env.dayofweek
	minofday = env.minofday
	vehicles, requests, wait, reject, gas, idle, original_requests, vehicle_scores = env.step(actions)
	####### EDITED HERE ########
	if t % 100 == 0:
	fpath = "./experiments/results/vehicle_logs/" + str(experiment_id)
	fname = "/status_" + str(t) + ".csv"
	if not os.path.exists(fpath):
	os.makedirs(fpath)
	df = vehicles.set_index("id").join(vehicle_scores.set_index("id"))
	save_path = fpath + fname
	df.to_csv(save_path, index=False)
	####### EDITED HERE ########
	avg_reward = vehicles.reward.mean()
	eff_dist_total = vehicles.eff_dist.sum()
	act_dist_total = vehicles.act_dist.sum()
	total_service_time = vehicle_scores.service_time.sum()

	curr_passengers = vehicles.curr_passengers.sum()

	try:
	curr_packages = vehicles.curr_packages.sum()
	except:
	curr_packages = 0

	score.loc[t] = (
	dayofweek,
	minofday,
	len(requests),
	wait,
	reject,
	total_service_time,
	idle,
	gas,
	sum(vehicles.available),
	dispatch,
	avg_reward - prev_reward,
	eff_dist_total - prev_eff_dist,
	act_dist_total - prev_act_dist,
	agent_time,
	original_requests,
	curr_passengers,
	curr_packages,
	)
	prev_reward = avg_reward
	prev_eff_dist = eff_dist_total
	prev_act_dist = act_dist_total

	if t > 0 and t % average_cycle == 0:
	elapsed = time.time() - start
	W, wait, reject, dispatch, reward, effective_dist, actual_dist, psg, pkg = score.loc[
	t - average_cycle : t - 1,
	[
	"requests",
	"wait_time",
	"reject",
	"dispatch",
	"reward",
	"effective_dist",
	"actual_dist",
	"curr_passengers",
	"curr_packages",
	],
	].sum()
	print(
	"t = {:d} ({:.0f} elapsed) // REQ: {:.0f} / PsG: {:.0f} / PkG: {:.0f} / REJ: {:.0f}/ ACC: {:.1f} / WAIT: {:.0f} / DSP: {:.2f} / RWD: {:.1f} / ED: {:.1f} / AD: {:.1f}".format(
	int(t * env.cycle),
	elapsed,
	W,
	psg,
	pkg,
	reject,
	(W - reject),
	wait,
	dispatch / N,
	reward,
	effective_dist,
	actual_dist,
	)
	)
	sys.stdout.flush()
	# pdb.set_trace()
	return score, env.get_vehicles_score()


	def load_trips(scenario, trip_path, sample_size, skiprows=0):

	trip_cols = pd.read_csv(trip_path, nrows=1).columns
	trips = pd.read_csv(trip_path, names=trip_cols, nrows=sample_size, skiprows=skiprows + 1)
	trips["second"] -= trips.loc[0, "second"]
	duration = int(trips.second.values[-1] / 60)
	# pdb.set_trace()
	dayofweek = trips.loc[0, "dayofweek"]
	minofday = trips.loc[0, "hour"] * 60 + trips.loc[0, "minute"]
	features = ["trip_time", "phash", "plat", "plon", "dhash", "dlat", "dlon", "second", "trip_distance", "hop_flag"]
	if scenario == "DeepPool":
	features = ["trip_time", "phash", "plat", "plon", "dhash", "dlat", "dlon", "second", "trip_distance"]
	elif scenario == "Hybrid":
	features = [
	"trip_time",
	"phash",
	"plat",
	"plon",
	"dhash",
	"dlat",
	"dlon",
	"g_type",
	"second",
	"trip_distance",
	"hop_flag",
	]
	trips = trips[features]

	return trips, dayofweek, minofday, duration


	def load_trip_chunks(scenario, trip_path, num_trips, duration, offset=0, randomize=True):
	trips, dayofweek, minofday, minutes = load_trips(scenario, trip_path, num_trips)
	num_chunks = int(minutes / duration)
	# pdb.set_trace()
	chunks = []
	date = 1
	for _ in range(num_chunks):
	trips["second"] -= trips.second.values[0]
	chunk = trips[trips.second < (duration + offset) * 60.0]
	chunks.append((chunk, date, dayofweek, minofday))
	trips = trips[trips.second >= (duration + offset) * 60.0]

	minofday += duration
	if minofday >= 1440: # 24 hour * 60 minute
	minofday -= 1440
	dayofweek = (dayofweek + 1) % 7
	date += 1
	if randomize:
	shuffle(chunks)

	return chunks


	def load_trip_eval(trip_path, num_trips, day_start=4, no_op_steps=30):
	trips, dayofweek, minofday, minutes = load_trips(trip_path, num_trips)
	chunks = []
	day_shift = (7 - dayofweek) % 7
	# Start at 6 am on Monday
	trips = trips[trips.second >= ((day_shift * 24 + day_start) * 60 - no_op_steps) * 60]
	dayofweek = 0
	minofday = day_start * 60 - no_op_steps
	date = 1 + day_shift

	while len(trips):
	trips["second"] -= trips.second.values[0]
	day_chunk = trips[trips.second < (24 * 60 + no_op_steps) * 60.0]
	chunks.append((day_chunk, date, dayofweek, minofday))
	trips = trips[trips.second >= 24 * 60 * 60.0]
	dayofweek = (dayofweek + 1) % 7
	date += 1
	if dayofweek == 0:
	break

	return chunks


	def describe(score, episode, fpath):
	total_requests = int(score.requests.sum())
	total_wait = score.wait_time.sum()
	total_reject = int(score.reject.sum())
	total_idle = int(score.total_idle_time.sum())
	total_service = int(score.total_service_time.sum())
	total_reward = score.reward.sum()
	avg_wait = total_wait / (total_requests - total_reject)
	reject_rate = float(total_reject) / total_requests
	effort = float(total_idle) / (total_requests * 0.2 - total_reject)
	avg_time = score.agent_time.mean()
	avg_num_transitions = (total_requests - total_reject) / (score.original_requests.sum())
	distance_ratio = (score.effective_dist.sum()) / (score.actual_dist.sum())
	total_time = (score.actual_dist.sum()) / (score.original_requests.sum() * 15)
	average_utilization = score.total_service_time / (score.total_service_time + score.total_idle_time)
	print("----------------------------------- EPISODE %d SUMMARY -----------------------------------" % episode)
	print(
	"REQUESTS: {0:d} / REJECTS: {1:d} / IDLE: {2:d} / SERVICE: {2:d} / REWARD: {3:.0f}/RATIO_EFF_DIST: {4:.2f}/TRANS: {5:.4f}".format(
	total_requests, total_reject, total_idle, total_service, total_reward, distance_ratio, avg_num_transitions
	)
	)
	print(
	"WAIT TIME: {0:.2f} / REJECT RATE: {1:.3f} / EFFORT: {2:.2f} / TIME: {3:.2f} / TRIP_TIME: {4:.4f} / AVG_UTILIZATION: {5:.2f}".format(
	avg_wait, reject_rate, effort, avg_time, total_time, average_utilization
	)
	)

	f = open(fpath, "a")
	f.write("----------------------------------- EPISODE %d SUMMARY -----------------------------------" % episode)
	f.write(
	"REQUESTS: {0:d} / REJECTS: {1:d} / IDLE: {2:d} / SERVICE: {2:d} / REWARD: {3:.0f}/RATIO_EFF_DIST: {4:.2f}/TRANS: {5:.4f}".format(
	total_requests, total_reject, total_idle, total_service, total_reward, distance_ratio, avg_num_transitions
	)
	)
	f.write(
	"WAIT TIME: {0:.2f} / REJECT RATE: {1:.3f} / EFFORT: {2:.2f} / TIME: {3:.2f} / TRIP_TIME: {4:.4f} / AVG_UTILIZATION: {5:.2f}".format(
	avg_wait, reject_rate, effort, avg_time, total_time, average_utilization
	)
	)
	f.close()