1 Star 0 Fork 0

张志阳/reinforcejs

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
waterworld.html 15.77 KB
一键复制 编辑 原始数据 按行查看 历史
Jorge Alvaro 提交于 2015-09-16 06:01 +08:00 . add "agent view", "human player" and "stats"
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>REINFORCEjs: WaterWorld demo</title>
<meta name="description" content="">
<meta name="author" content="">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- jquery and jqueryui -->
<script src="https://code.jquery.com/jquery-2.1.3.min.js"></script>
<link href="external/jquery-ui.min.css" rel="stylesheet">
<script src="external/jquery-ui.min.js"></script>
<!-- bootstrap -->
<script src="http://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<link href="http://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css" rel="stylesheet">
<!-- d3js -->
<script type="text/javascript" src="external/d3.min.js"></script>
<!-- markdown -->
<script type="text/javascript" src="external/marked.js"></script>
<script type="text/javascript" src="external/highlight.pack.js"></script>
<link rel="stylesheet" href="external/highlight_default.css">
<script>hljs.initHighlightingOnLoad();</script>
<!-- mathjax: nvm now loaded dynamically
<script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
-->
<!-- rljs -->
<script type="text/javascript" src="lib/rl.js"></script>
<!-- flotjs -->
<script src="external/jquery.flot.min.js"></script>
<!-- environment dynamics -->
<script src="waterworld.js"></script>
<!-- GA -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-3698471-24', 'auto');
ga('send', 'pageview');
</script>
<style>
#wrap {
width:800px;
margin-left: auto;
margin-right: auto;
}
h2 {
text-align: center;
}
body {
font-family: Arial, "Helvetica Neue", Helvetica, sans-serif;
}
canvas {
border: 1px solid black;
}
</style>
<script type="application/javascript">
var canvas, ctx;
var agentView = false;
var humanControls = false;
// Draw everything
function draw() {
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.lineWidth = 1;
var agents = w.agents;
// draw walls in environment
ctx.strokeStyle = "rgb(0,0,0)";
ctx.beginPath();
for(var i=0,n=w.walls.length;i<n;i++) {
var q = w.walls[i];
ctx.moveTo(q.p1.x, q.p1.y);
ctx.lineTo(q.p2.x, q.p2.y);
}
ctx.stroke();
// draw agents
// color agent based on reward it is experiencing at the moment
var r = 0;
ctx.fillStyle = "rgb(" + r + ", 150, 150)";
ctx.strokeStyle = "rgb(0,0,0)";
for(var i=0,n=agents.length;i<n;i++) {
var a = agents[i];
// draw agents body
ctx.beginPath();
ctx.arc(a.op.x, a.op.y, a.rad, 0, Math.PI*2, true);
ctx.fill();
ctx.stroke();
// draw agents sight
for(var ei=0,ne=a.eyes.length;ei<ne;ei++) {
var e = a.eyes[ei];
var sr = e.sensed_proximity;
if(e.sensed_type === -1 || e.sensed_type === 0) {
ctx.strokeStyle = "rgb(200,200,200)"; // wall or nothing
}
if(e.sensed_type === 1) { ctx.strokeStyle = "rgb(255,150,150)"; } // apples
if(e.sensed_type === 2) { ctx.strokeStyle = "rgb(150,255,150)"; } // poison
ctx.beginPath();
ctx.moveTo(a.op.x, a.op.y);
ctx.lineTo(a.op.x + sr * Math.sin(a.oangle + e.angle),
a.op.y + sr * Math.cos(a.oangle + e.angle));
ctx.stroke();
}
}
// draw items
ctx.strokeStyle = "rgb(0,0,0)";
if(!agentView) {
for(var i=0,n=w.items.length;i<n;i++) {
var it = w.items[i];
if(it.type === 1) ctx.fillStyle = "rgb(255, 150, 150)";
if(it.type === 2) ctx.fillStyle = "rgb(150, 255, 150)";
ctx.beginPath();
ctx.arc(it.p.x, it.p.y, it.rad, 0, Math.PI*2, true);
ctx.fill();
ctx.stroke();
}
}
}
// Tick the world
var smooth_reward_history = []; // [][];
var smooth_reward = [];
var flott = 0;
function tick() {
if(simspeed === 3) {
for(var k=0;k<50;k++) {
w.tick();
}
} else {
w.tick();
}
draw();
updateStats();
flott += 1;
for(i=0; i<w.agents.length; i++) {
var rew = w.agents[i].last_reward;
if(!smooth_reward[i]) { smooth_reward[i] = 0; }
smooth_reward[i] = smooth_reward[i] * 0.999 + rew * 0.001;
if(flott === 50) {
// record smooth reward
if(smooth_reward_history[i].length >= nflot) {
smooth_reward_history[i] = smooth_reward_history[i].slice(1);
}
smooth_reward_history[i].push(smooth_reward[i]);
}
}
if(flott === 50) {
flott = 0;
}
var agent = w.agents[0];
if(typeof agent.expi !== 'undefined') {
$("#expi").html(agent.expi);
}
if(typeof agent.tderror !== 'undefined') {
$("#tde").html(agent.tderror.toFixed(3));
}
}
// flot stuff
var nflot = 1000;
function initFlot() {
var container = $("#flotreward");
var res = getFlotRewards(0);
var res1 = getFlotRewards(1);
series = [{
data: res,
lines: {fill: true}
}, {
data: res1,
lines: {fill: true}
}];
var plot = $.plot(container, series, {
grid: {
borderWidth: 1,
minBorderMargin: 20,
labelMargin: 10,
backgroundColor: {
colors: ["#FFF", "#e4f4f4"]
},
margin: {
top: 10,
bottom: 10,
left: 10,
}
},
xaxis: {
min: 0,
max: nflot
},
yaxis: {
min: -0.1,
max: 0.1
}
});
setInterval(function(){
for(var i=0; i<w.agents.length; i++) {
series[i].data = getFlotRewards(i);
}
plot.setData(series);
plot.draw();
}, 100);
}
function getFlotRewards(agentId) {
// zip rewards into flot data
var res = [];
if(agentId >= w.agents.length || !smooth_reward_history[agentId]) {
return res;
}
for(var i=0,n=smooth_reward_history[agentId].length;i<n;i++) {
res.push([i, smooth_reward_history[agentId][i]]);
}
return res;
}
var simspeed = 2;
function goveryfast() {
window.clearInterval(current_interval_id);
current_interval_id = setInterval(tick, 0);
skipdraw = true;
simspeed = 3;
}
function gofast() {
window.clearInterval(current_interval_id);
current_interval_id = setInterval(tick, 0);
skipdraw = true;
simspeed = 2;
}
function gonormal() {
window.clearInterval(current_interval_id);
current_interval_id = setInterval(tick, 30);
skipdraw = false;
simspeed = 1;
}
function goslow() {
window.clearInterval(current_interval_id);
current_interval_id = setInterval(tick, 200);
skipdraw = false;
simspeed = 0;
}
function saveAgent() {
var brain = w.agents[0].brain;
$("#mysterybox").fadeIn();
$("#mysterybox").val(JSON.stringify(brain.toJSON()));
}
function resetAgent() {
eval($("#agentspec").val())
var brain = new RL.DQNAgent(env, spec);
w.agents[0].brain = brain;
}
function loadAgent() {
$.getJSON( "agentzoo/wateragent.json", function( data ) {
var agent = w.agents[0].brain;
agent.fromJSON(data); // corss your fingers...
// set epsilon to be much lower for more optimal behavior
agent.epsilon = 0.05;
$("#slider").slider('value', agent.epsilon);
$("#eps").html(agent.epsilon.toFixed(2));
// kill learning rate to not learn
agent.alpha = 0;
});
}
function toggleAgentView() {
agentView = !agentView;
}
var lastKey = null;
document.onkeydown = function(e) {
var event = window.event ? window.event : e;
lastKey = event.keyCode
if(lastKey == 37 || lastKey == 38 || lastKey == 39 || lastKey == 40) {
enableHuman();
e.preventDefault();
if(lastKey == 37) {
humanAction = 0;
}
if(lastKey == 39) {
humanAction = 1;
}
if(lastKey == 38) {
humanAction = 2;
}
if(lastKey == 40) {
humanAction = 3;
}
}
};
var humanAction = -1;
function enableHuman() {
if(!humanControls) {
humanControls = true;
var a = new Agent();
a.forward = function() {
this.action = humanAction;
humanAction = -1;
};
a.brain = {
learn: function(reward) {
// Do nothing;
}
};
w.agents.push(a);
smooth_reward_history.push([]);
}
}
var w; // global world object
var current_interval_id;
var skipdraw = false;
function start() {
canvas = document.getElementById("canvas");
ctx = canvas.getContext("2d");
eval($("#agentspec").val())
w = new World();
w.agents = [];
for(var k = 0; k < 1; k++) {
var a = new Agent();
env = a;
a.brain = new RL.DQNAgent(env, spec); // give agent a TD brain
//a.brain = new RL.RecurrentReinforceAgent(env, {});
w.agents.push(a);
smooth_reward_history.push([]);
}
$( "#slider" ).slider({
min: 0,
max: 1,
value: w.agents[0].brain.epsilon,
step: 0.01,
slide: function(event, ui) {
w.agents[0].brain.epsilon = ui.value;
$("#eps").html(ui.value.toFixed(2));
}
});
$("#eps").html(w.agents[0].brain.epsilon.toFixed(2));
$("#slider").slider('value', w.agents[0].brain.epsilon);
initFlot();
gonormal();
// render markdown
$(".md").each(function(){
$(this).html(marked($(this).html()));
});
renderJax();
}
var jaxrendered = false;
function renderJax() {
if(jaxrendered) { return; }
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
jaxrendered = true;
})();
}
function updateStats() {
var stats = "<ul>";
for(var i=0; i<w.agents.length; i++) {
stats += "<li>Player " + (i+1) + ": " + w.agents[i].apples + " apples, " + w.agents[i].poison + " poison</li>";
}
stats += "</ul>";
$("#apples_and_poison").html(stats);
}
</script>
<style type="text/css">
canvas { border: 1px solid white; }
</style>
</head>
<body onload="start();">
<a href="https://github.com/karpathy/reinforcejs"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://camo.githubusercontent.com/38ef81f8aca64bb9a64448d0d70f1308ef5341ab/68747470733a2f2f73332e616d617a6f6e6177732e636f6d2f6769746875622f726962626f6e732f666f726b6d655f72696768745f6461726b626c75655f3132313632312e706e67" alt="Fork me on GitHub" data-canonical-src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png"></a>
<div id="wrap">
<div id="mynav" style="border-bottom:1px solid #999; padding-bottom: 10px; margin-bottom:50px;">
<div>
<img src="loop.svg" style="width:50px;height:50px;float:left;">
<h1 style="font-size:50px;">REINFORCE<span style="color:#058;">js</span></h1>
</div>
<ul class="nav nav-pills">
<li role="presentation"><a href="index.html">About</a></li>
<li role="presentation"><a href="gridworld_dp.html">GridWorld: DP</a></li>
<li role="presentation"><a href="gridworld_td.html">GridWorld: TD</a></li>
<li role="presentation"><a href="puckworld.html">PuckWorld: DQN</a></li>
<li role="presentation" class="active"><a href="waterworld.html">WaterWorld: DQN</a></li>
</ul>
</div>
<textarea id="agentspec" style="width:100%;height:250px;">
// agent parameter spec to play with (this gets eval()'d on Agent reset)
var spec = {}
spec.update = 'qlearn'; // qlearn | sarsa
spec.gamma = 0.9; // discount factor, [0, 1)
spec.epsilon = 0.2; // initial epsilon for epsilon-greedy policy, [0, 1)
spec.alpha = 0.005; // value function learning rate
spec.experience_add_every = 5; // number of time steps before we add another experience to replay memory
spec.experience_size = 10000; // size of experience
spec.learning_steps_per_iteration = 5;
spec.tderror_clamp = 1.0; // for robustness
spec.num_hidden_units = 100 // number of neurons in hidden layer
</textarea>
<div style="text-align:center;">
<button class="btn btn-danger" onclick="resetAgent()" style="width:150px;height:50px;margin-bottom:5px;">Reinit agent</button>
<button class="btn btn-success" onclick="goveryfast()" style="width:150px;height:50px;margin-bottom:5px;">Go very fast</button>
<button class="btn btn-success" onclick="gofast()" style="width:150px;height:50px;margin-bottom:5px;">Go fast</button>
<button class="btn btn-success" onclick="gonormal()" style="width:150px;height:50px;margin-bottom:5px;">Go normal</button>
<button class="btn btn-success" onclick="goslow()" style="width:150px;height:50px;margin-bottom:5px;">Go slow</button>
<button class="btn btn-danger" onclick="toggleAgentView()" style="width:150px;height:50px;margin-bottom:5px;">Toggle Agent View</button>
<button class="btn btn-danger" onclick="enableHuman()" style="width:150px;height:50px;margin-bottom:5px;">Start playing (use arrow keys)</button>
<canvas id="canvas" width="700" height="500"></canvas>
</div>
<div id="apples_and_poison"></div>
<div id="brain_info_div"></div>
<button class="btn btn-primary" onclick="loadAgent()" style="width:200px;height:35px;margin-bottom:5px;margin-right:20px;">Load a Pretrained Agent</button>
<br>
Exploration epsilon: <span id="eps">0.15</span> <div id="slider"></div>
<br>
<div id="expi"></div>
<div id="tde"></div>
<div id="flotreward" style="width:800px; height: 400px;"></div>
<textarea id="mysterybox" style="width:100%;display:none;">mystery text box</textarea>
<div id="exp" class="md">
### Setup
This is another Deep Q Learning demo with a more realistic and larger setup:
- The **state space** is even larger and continuous: The agent has 30 eye sensors pointing in all directions and in each direction is observes 5 variables: the range, the type of sensed object (green, red), and the velocity of the sensed object. The agent's proprioception includes two additional sensors for its own speed in both x and y directions. This is a total of 152-dimensional state space.
- There are 4 **actions** available to the agent: To apply thrusters to the left, right, up and down. This gives the agent control over its velocity.
- The **dynamics** integrate the velocity of the agent to change its position. The green and red targets bounce around.
- The **reward** awarded to the agent is +1 for making contact with any red target (these are apples) and -1 for making contact with any green target (this is poison).
The optimal strategy of the agent is to cruise around, run away from green targets and eat red targets. What's interesting about this demo is that the state space is so high-dimensional, and also that the sensed variables are agent-relative. They're not just toy x,y coordinates of some fixed number of targets as in previous demo.
</div>
<br><br><br><br>
</div>
</body>
</html>
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
JavaScript
1
https://gitee.com/zhang-zhiyang/reinforcejs.git
git@gitee.com:zhang-zhiyang/reinforcejs.git
zhang-zhiyang
reinforcejs
reinforcejs
master

搜索帮助