diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c5b5606
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+Results/
+.DS_Store
+.idea/
+Scripts/__pycache__/
\ No newline at end of file
diff --git a/Display/Fig1a.png b/Display/Fig1a.png
new file mode 100644
index 0000000..41bd0c3
Binary files /dev/null and b/Display/Fig1a.png differ
diff --git a/Display/Fig1b.png b/Display/Fig1b.png
new file mode 100644
index 0000000..beb5a31
Binary files /dev/null and b/Display/Fig1b.png differ
diff --git a/Main.py b/Main.py
new file mode 100644
index 0000000..f1d4f19
--- /dev/null
+++ b/Main.py
@@ -0,0 +1,80 @@
+from Scripts.Algorithm import train, evaluateMARLNonLocal, evaluateMARLLocal
+from Scripts.Parameters import ParseInput
+import time
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+if __name__ == '__main__':
+    args = ParseInput()
+
+    t0 = time.time()
+
+    indexN = 0
+    valueLocalArray = np.zeros(args.numN)
+    valueLocalArraySD = np.zeros(args.numN)
+
+    valueNonLocalArray = np.zeros(args.numN)
+    valueNonLocalArraySD = np.zeros(args.numN)
+
+    ErrorArray = np.zeros(args.numN)
+    ErrorArraySD = np.zeros(args.numN)
+
+    NVec = np.zeros(args.numN)
+
+    if args.train:
+        print('Training is in progress.')
+        train(args)
+
+    print('Evaluation is in progress.')
+    while indexN < args.numN:
+        N = args.minN + indexN * args.divN
+        NVec[indexN] = N
+
+        for _ in range(0, args.maxSeed):
+            valueLocal = evaluateMARLLocal(args, N)
+            valueLocal = np.array(valueLocal.detach())
+
+            valueLocalArray[indexN] += valueLocal/args.maxSeed
+            valueLocalArraySD[indexN] += valueLocal ** 2 / args.maxSeed
+
+            valueNonLocal = evaluateMARLNonLocal(args, N)
+            valueNonLocal = np.array(valueNonLocal.detach())
+
+            valueNonLocalArray[indexN] += valueNonLocal/args.maxSeed
+            valueNonLocalArraySD[indexN] += valueNonLocal**2/args.maxSeed
+
+            Error = np.abs(valueNonLocal - valueLocal)
+            ErrorArray[indexN] += Error/args.maxSeed
+            ErrorArraySD[indexN] += Error**2/args.maxSeed
+
+        indexN += 1
+        print(f'N: {N}')
+
+    valueLocalArraySD = np.sqrt(np.maximum(0, valueLocalArraySD - valueLocalArray ** 2))
+    valueNonLocalArraySD = np.sqrt(np.maximum(0, valueNonLocalArraySD - valueNonLocalArray ** 2))
+    ErrorArraySD = np.sqrt(np.maximum(0, ErrorArraySD - ErrorArray ** 2))
+
+    if not os.path.exists('Results'):
+        os.mkdir('Results')
+
+    plt.figure()
+    plt.xlabel('N')
+    plt.ylabel('Values')
+    plt.plot(NVec, valueLocalArray, label='Local')
+    plt.fill_between(NVec, valueLocalArray - valueLocalArraySD, valueLocalArray + valueLocalArraySD, alpha=0.3)
+    plt.plot(NVec, valueNonLocalArray, label='Non-Local')
+    plt.fill_between(NVec, valueNonLocalArray - valueNonLocalArraySD, valueNonLocalArray + valueNonLocalArraySD, alpha=0.3)
+    plt.legend()
+    plt.savefig(f'Results/Values.png')
+
+    plt.figure()
+    plt.xlabel('N')
+    plt.ylabel('Error')
+    plt.plot(NVec, ErrorArray)
+    plt.fill_between(NVec, ErrorArray - ErrorArraySD, ErrorArray + ErrorArraySD, alpha=0.3)
+    plt.savefig(f'Results/Error.png')
+
+    t1 = time.time()
+
+    print(f'Elapsed time is {t1-t0} sec')
diff --git a/Models/Actor.pkl b/Models/Actor.pkl
new file mode 100644
index 0000000..48bc9f3
Binary files /dev/null and b/Models/Actor.pkl differ
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e9458d4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,35 @@
+# Introduction
+
+This repository contains codes that are used for generating numerical results in the following paper: 
+
+"On the Near-Optimality of Local Policies in Large Cooperative 
+Multi-Agent Reinforcement Learning", Transactions on Machine Learning Research, 2022.
+
+# Parameters
+
+Various parameters used in the experiments can be found in Scripts/Parameters.py file.
+
+# Results
+
+Generated results will be stored in Results folder (will be created on the fly).
+Some pre-generated results are available for display in the Display folder. Specifically,
+Fig. 1 depicts the percentage error between the values generated by local and non-local policies in an N-agent system
+as a function of N.
+
+# Run Experiments
+
+```
+python3 Main.py
+```
+
+# Command Line Options
+
+Various command line options are given below:
+
+```
+--train : if training is required from scratch, otherwise a pre-trained model will be used   
+--minN : minimum value of N  
+--numN : number of N values  
+--divN : difference between two consecutive N values  
+--maxSeed: number of random seeds 
+```
\ No newline at end of file
diff --git a/Scripts/Algorithm.py b/Scripts/Algorithm.py
new file mode 100644
index 0000000..89fa20b
--- /dev/null
+++ b/Scripts/Algorithm.py
@@ -0,0 +1,341 @@
+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+import torch.nn as nn
+from torch.distributions.categorical import Categorical
+import os
+import copy
+
+
+class Actor(nn.Module):
+    def __init__(self, state_size, action_size, hidden_size=32):
+        super(Actor, self).__init__()
+        self.state_size = 2*state_size  # one-hot state + mean-distribution
+        self.action_size = action_size
+        self.hidden_size = hidden_size
+        self.linear1 = nn.Linear(self.state_size, self.hidden_size)
+        self.linear2 = nn.Linear(self.hidden_size, self.hidden_size)
+        self.linear3 = nn.Linear(self.hidden_size, self.action_size)
+
+    def forward(self, state, state_dist):
+        state_joined = torch.cat([state, state_dist])
+        output = F.relu(self.linear1(state_joined))
+        output = F.relu(self.linear2(output))
+        output = F.softmax(self.linear3(output), dim=-1)
+        return output
+
+
+def train(args):
+    actor = Actor(args.num_states, args.num_actions, args.hidden_size)
+    NumActParam = 2*args.num_states * args.hidden_size + args.hidden_size + args.hidden_size**2 + args.hidden_size + args.hidden_size*args.num_actions + args.num_actions
+    optimizer = optim.Adam(list(actor.parameters()))
+
+    # Floating point representation of states
+    states_float = torch.tensor(range(0, args.num_states)).float()
+
+    for j in range(args.J):
+
+        w = torch.zeros(NumActParam)
+        w_avg = torch.zeros(NumActParam)
+
+        for _ in range(args.L):
+
+            # Initial state distribution
+            curr_state_dist = torch.ones(args.num_states) / args.num_states
+            curr_state = Categorical(curr_state_dist).sample().long()
+
+            """ ------------ Sampling (x, mu, u) ------------ """
+            FLAG = False
+            while not FLAG:
+                if torch.rand(1) > args.gamma:
+                    FLAG = True
+                """ --------- Update Subroutine -------------- """
+
+                """ ------------ Current State ------------------- """
+                curr_state_one_hot = torch.zeros(args.num_states)
+                curr_state_one_hot[curr_state] = 1
+
+                """ ------------- Mean of Current State Distribution ------------- """
+                curr_state_dist_mean = torch.dot(states_float, curr_state_dist)
+
+                """ ------------- Current Action ------------------ """
+                policy = Categorical(actor(curr_state_one_hot, curr_state_dist))
+                curr_action = policy.sample().long()
+
+                """ ------------- Next State --------------- """
+                fraction = 1 - (curr_state_dist_mean/args.num_states)
+                if curr_action == 0:
+                    next_state = curr_state
+                else:
+                    chi = torch.rand(1)
+                    next_state = curr_state + (chi * fraction * (args.num_states - 1 - curr_state)).long()
+                    next_state_one_hot = torch.zeros(args.num_states)
+                    next_state_one_hot[next_state] = 1
+
+                """ -------------- Next State Distribution ------------- """
+
+                next_state_dist = torch.zeros(args.num_states)
+                for state_t in range(0, args.num_states):
+                    one_hot_state_t = torch.zeros(args.num_states)
+                    one_hot_state_t[state_t] = 1
+
+                    for action_t in range(0, args.num_actions):
+                        dist_vec = torch.zeros(args.num_states)
+                        if action_t == 0:
+                            dist_vec[state_t] = 1
+                        else:
+                            prob_mass = 1/(fraction * (args.num_states - 1 - state_t))
+                            total_prob = torch.tensor(1.0)
+                            state_t_plus_1 = state_t
+                            while total_prob > 0 and state_t_plus_1 < args.num_states:
+                                dist_vec[state_t_plus_1] = torch.minimum(prob_mass, total_prob)
+                                total_prob -= torch.minimum(prob_mass, total_prob)
+                                state_t_plus_1 += 1
+
+                        prob = actor(one_hot_state_t, curr_state_dist)[action_t] * curr_state_dist[state_t]
+                        next_state_dist += dist_vec * prob
+
+                """ --------------------- Update ------------------ """
+                curr_state = copy.copy(next_state)
+                curr_state_dist = copy.copy(next_state_dist)
+
+            """ ------------ Sampling Advantage Functions ---------- """
+            FLAG = False
+            SumRewards = torch.tensor([0.])
+
+            while not FLAG:
+                if torch.rand(1) > args.gamma:
+                    FLAG = True
+                """ --------- Update Subroutine -------------- """
+
+                """ ------------ Current State ------------------- """
+                curr_state_one_hot = torch.zeros(args.num_states)
+                curr_state_one_hot[curr_state] = 1
+
+                """ ------------- Mean of Current State Distribution ------------- """
+                curr_state_dist_mean = torch.dot(states_float, curr_state_dist)
+
+                """ ------------- Current Action ------------------ """
+                policy = Categorical(actor(curr_state_one_hot, curr_state_dist))
+                curr_action = policy.sample().long()
+
+                """ ------------- Next State --------------- """
+                fraction = 1 - (curr_state_dist_mean/args.num_states)
+                if curr_action == 0:
+                    next_state = curr_state
+                else:
+                    chi = torch.rand(1)
+                    next_state = curr_state + (chi * fraction * (args.num_states - 1 - curr_state)).long()
+                    next_state_one_hot = torch.zeros(args.num_states)
+                    next_state_one_hot[next_state] = 1
+
+                """ -------------- Next State Distribution ------------- """
+
+                next_state_dist = torch.zeros(args.num_states)
+                for state_t in range(0, args.num_states):
+                    one_hot_state_t = torch.zeros(args.num_states)
+                    one_hot_state_t[state_t] = 1
+
+                    for action_t in range(0, args.num_actions):
+                        dist_vec = torch.zeros(args.num_states)
+                        if action_t == 0:
+                            dist_vec[state_t] = 1
+                        else:
+                            prob_mass = 1/(fraction * (args.num_states - 1 - state_t))
+                            total_prob = torch.tensor(1.0)
+                            state_t_plus_1 = state_t
+                            while total_prob > 0 and state_t_plus_1 < args.num_states:
+                                dist_vec[state_t_plus_1] = torch.minimum(prob_mass, total_prob)
+                                total_prob -= torch.minimum(prob_mass, total_prob)
+                                state_t_plus_1 += 1
+
+                        prob = actor(one_hot_state_t, curr_state_dist)[action_t] * curr_state_dist[state_t]
+                        next_state_dist += dist_vec * prob
+
+                """ -------------- SumRewards Update ---------- """
+                SumRewards += args.alpha_r * curr_state - args.beta_r * curr_state_dist_mean - args.lambda_r * curr_action
+
+                """ --------------------- Update ------------------ """
+                curr_state = copy.copy(next_state)
+                curr_state_dist = copy.copy(next_state_dist)
+
+            Value_R = 0
+            Q_R = 0
+
+            if torch.rand(1) < 0.5:
+                Value_R = SumRewards
+            else:
+                Q_R = SumRewards
+
+            Advantage_R = 2*(Q_R-Value_R)
+
+            # Gradient Update for the Sub-Problem
+            log_prob = policy.log_prob(curr_action)
+            optimizer.zero_grad()
+            log_prob.backward()
+
+            phi_grads = []
+            for f in actor.parameters():
+                phi_grads.append(f.grad.view(-1))
+            phi_grads = torch.cat(phi_grads)
+
+            h_grads = (torch.dot(w, phi_grads)-Advantage_R)*phi_grads
+
+            w = w - args.alpha * h_grads
+            w_avg += w/args.L
+
+        count = 0
+        for phi in actor.parameters():
+            phi.data -= (args.eta/(1-args.gamma))*w_avg[count]
+            count += 1
+
+    if not os.path.exists('Models'):
+        os.mkdir('Models')
+    torch.save(actor.state_dict(), f'Models/Actor.pkl')
+
+
+def evaluateMARLLocal(args, N):
+    actor = Actor(args.num_states, args.num_actions)
+
+    if not os.path.exists(f'Models/Actor.pkl'):
+        raise ValueError('Model does not exist.')
+    actor.load_state_dict(torch.load(f'Models/Actor.pkl'))
+
+    # Initial state distribution
+    init_state_dist = torch.ones(args.num_states)/args.num_states
+
+    # Initial infinite population mean-field state distribution
+    curr_mf_state_dist = torch.ones(args.num_states) / args.num_states
+
+    # Current Joint State
+    curr_joint_state = Categorical(init_state_dist).sample([N]).long()
+    next_joint_state = torch.zeros(N).long()
+
+    # Floating point representation of states
+    states_float = torch.tensor(range(0, args.num_states)).float()
+
+    # Doubly Stochastic Interaction Matrix
+    W = torch.ones([N, N])/N
+
+    ValueRewardMARL = 0
+    curr_gamma = 1
+
+    for iter_count in range(args.run_eval):
+        curr_average_reward = 0
+
+        curr_joint_state_one_hot = torch.zeros([N, args.num_states])
+        curr_joint_state_one_hot[range(0, N), curr_joint_state] = 1
+
+        curr_state_dist = torch.matmul(W, curr_joint_state_one_hot)
+
+        for agent_index in range(0, N):
+            agent_state = curr_joint_state[agent_index]
+            agent_state_one_hot = curr_joint_state_one_hot[agent_index, :]
+            agent_state_dist = curr_state_dist[agent_index, :]
+            agent_state_dist_mean = torch.dot(states_float, agent_state_dist)
+            """ ------- Local Policy --------- """
+            agent_action = Categorical(actor(agent_state_one_hot, curr_mf_state_dist)).sample()
+
+            agent_reward = args.alpha_r * agent_state - args.beta_r * agent_state_dist_mean - args.lambda_r * agent_action
+            curr_average_reward += agent_reward/N
+
+            # Next State for the agent
+            if agent_action == 1:
+                chi = torch.rand(1)
+                fraction = 1 - (agent_state_dist_mean/args.num_states)
+                next_joint_state[agent_index] = curr_joint_state[agent_index] + (chi*fraction*(args.num_states - 1 - curr_joint_state[agent_index])).long()
+            else:
+                next_joint_state[agent_index] = curr_joint_state[agent_index]
+
+        ValueRewardMARL += curr_gamma*args.gamma*curr_average_reward
+        curr_gamma *= args.gamma
+
+        """ --------------- Mean-Field Update ------------ """
+        curr_mf_state_dist_mean = torch.dot(states_float, curr_mf_state_dist)
+        mf_fraction = 1 - (curr_mf_state_dist_mean / args.num_states)
+
+        next_mf_state_dist = torch.zeros(args.num_states)
+
+        for state_t in range(0, args.num_states):
+            one_hot_state_t = torch.zeros(args.num_states)
+            one_hot_state_t[state_t] = 1
+
+            for action_t in range(0, args.num_actions):
+                dist_vec = torch.zeros(args.num_states)
+                if action_t == 0:
+                    dist_vec[state_t] = 1
+                else:
+                    prob_mass = 1 / (mf_fraction * (args.num_states - 1 - state_t))
+                    total_prob = torch.tensor(1.0)
+                    state_t_plus_1 = state_t
+                    while total_prob > 0 and state_t_plus_1 < args.num_states:
+                        dist_vec[state_t_plus_1] = torch.minimum(prob_mass, total_prob)
+                        total_prob -= torch.minimum(prob_mass, total_prob)
+                        state_t_plus_1 += 1
+
+                prob = actor(one_hot_state_t, curr_mf_state_dist)[action_t] * curr_mf_state_dist[state_t]
+                next_mf_state_dist += dist_vec * prob
+
+        """ ----------- Update -------------------- """
+        curr_joint_state = copy.copy(next_joint_state)
+        curr_mf_state_dist = copy.copy(next_mf_state_dist)
+
+    return ValueRewardMARL
+
+
+def evaluateMARLNonLocal(args, N):
+    actor = Actor(args.num_states, args.num_actions)
+
+    if not os.path.exists(f'Models/Actor.pkl'):
+        raise ValueError('Model does not exist.')
+    actor.load_state_dict(torch.load(f'Models/Actor.pkl'))
+
+    # Initial state distribution
+    init_state_dist = torch.ones(args.num_states)/args.num_states
+
+    # Current Joint State
+    curr_joint_state = Categorical(init_state_dist).sample([N]).long()
+    next_joint_state = torch.zeros(N).long()
+
+    # Floating point representation of states
+    states_float = torch.tensor(range(0, args.num_states)).float()
+
+    # Doubly Stochastic Interaction Matrix
+    W = torch.ones([N, N])/N
+
+    ValueRewardMARL = 0
+    curr_gamma = 1
+
+    for iter_count in range(args.run_eval):
+        curr_average_reward = 0
+
+        curr_joint_state_one_hot = torch.zeros([N, args.num_states])
+        curr_joint_state_one_hot[range(0, N), curr_joint_state] = 1
+
+        curr_state_dist = torch.matmul(W, curr_joint_state_one_hot)
+
+        for agent_index in range(0, N):
+            agent_state = curr_joint_state[agent_index]
+            agent_state_one_hot = curr_joint_state_one_hot[agent_index, :]
+            agent_state_dist = curr_state_dist[agent_index, :]
+            agent_state_dist_mean = torch.dot(states_float, agent_state_dist)
+            agent_action = Categorical(actor(agent_state_one_hot, agent_state_dist)).sample()
+
+            agent_reward = args.alpha_r * agent_state - args.beta_r * agent_state_dist_mean - args.lambda_r * agent_action
+            curr_average_reward += agent_reward/N
+
+            # Next State for the agent
+            if agent_action == 1:
+                chi = torch.rand(1)
+                fraction = 1 - (agent_state_dist_mean/args.num_states)
+                next_joint_state[agent_index] = curr_joint_state[agent_index] + (chi*fraction*(args.num_states - 1 - curr_joint_state[agent_index])).long()
+            else:
+                next_joint_state[agent_index] = curr_joint_state[agent_index]
+
+        ValueRewardMARL += curr_gamma*args.gamma*curr_average_reward
+        curr_gamma *= args.gamma
+
+        """ ----------- State Update -------------------- """
+        curr_joint_state = copy.copy(next_joint_state)
+
+    return ValueRewardMARL
diff --git a/Scripts/Parameters.py b/Scripts/Parameters.py
new file mode 100644
index 0000000..b4f4b0d
--- /dev/null
+++ b/Scripts/Parameters.py
@@ -0,0 +1,35 @@
+import argparse
+
+
+def ParseInput():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train', action='store_true', help='enable training')
+
+    """ ---------- Simulation Parameters ---------- """
+    parser.add_argument('--minN', type=int, default=5, dest='minN', help='minimumN')
+    parser.add_argument('--numN', type=int, default=20, dest='numN', help='numberN')
+    parser.add_argument('--divN', type=int, default=5, dest='divN', help='divisionN')
+    parser.add_argument('--maxSeed', type=int, default=25, dest='maxSeed', help='numberSeed')
+
+    args = parser.parse_args()
+
+    """ ---------- Algorithm Hyperparameters ------- """
+
+    args.num_actions = 2
+    args.num_states = 10
+    args.J = 10 ** 2                 # Number of iterations for training the neural network based policy
+    args.L = 10 ** 2
+    args.run_eval = 10 ** 2          # Number of iterations for evaluating a policy
+    args.gamma = 0.9                 # Discount factor
+
+    """ --------- Reward Parameters --------- """
+    args.alpha_r = 1
+    args.beta_r = 0.5
+    args.lambda_r = 0.5
+
+    """----------- Learning Parameters --------- """
+    args.alpha = 10**-3
+    args.eta = 10**-3
+    args.hidden_size = 32
+
+    return args