macop.policies.reinforcement.html 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. <!DOCTYPE html>
  2. <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
  3. <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
  4. <head>
  5. <meta charset="utf-8">
  6. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7. <title>macop.policies.reinforcement &mdash; macop v1.0.5 documentation</title>
  8. <script type="text/javascript" src="../_static/js/modernizr.min.js"></script>
  9. <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
  10. <script type="text/javascript" src="../_static/jquery.js"></script>
  11. <script type="text/javascript" src="../_static/underscore.js"></script>
  12. <script type="text/javascript" src="../_static/doctools.js"></script>
  13. <script type="text/javascript" src="../_static/language_data.js"></script>
  14. <script type="text/javascript" src="../_static/js/theme.js"></script>
  15. <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  16. <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  17. <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
  18. <link rel="index" title="Index" href="../genindex.html" />
  19. <link rel="search" title="Search" href="../search.html" />
  20. <link rel="next" title="macop.solutions.base" href="macop.solutions.base.html" />
  21. <link rel="prev" title="macop.policies.classicals" href="macop.policies.classicals.html" />
  22. </head>
  23. <body class="wy-body-for-nav">
  24. <div class="wy-grid-for-nav">
  25. <nav data-toggle="wy-nav-shift" class="wy-nav-side">
  26. <div class="wy-side-scroll">
  27. <div class="wy-side-nav-search" >
  28. <a href="../index.html" class="icon icon-home"> macop
  29. </a>
  30. <div class="version">
  31. 1.0.5
  32. </div>
  33. <div role="search">
  34. <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
  35. <input type="text" name="q" placeholder="Search docs" />
  36. <input type="hidden" name="check_keywords" value="yes" />
  37. <input type="hidden" name="area" value="default" />
  38. </form>
  39. </div>
  40. </div>
  41. <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
  42. <p class="caption"><span class="caption-text">Contents:</span></p>
  43. <ul class="current">
  44. <li class="toctree-l1"><a class="reference internal" href="../description.html">Description</a></li>
  45. <li class="toctree-l1 current"><a class="reference internal" href="../api.html">API</a><ul class="current">
  46. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop">macop</a></li>
  47. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop-algorithms">macop.algorithms</a></li>
  48. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop-callbacks">macop.callbacks</a></li>
  49. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop-evaluators">macop.evaluators</a></li>
  50. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop-operators">macop.operators</a></li>
  51. <li class="toctree-l2 current"><a class="reference internal" href="../api.html#macop-policies">macop.policies</a><ul class="current">
  52. <li class="toctree-l3"><a class="reference internal" href="macop.policies.base.html">macop.policies.base</a></li>
  53. <li class="toctree-l3"><a class="reference internal" href="macop.policies.classicals.html">macop.policies.classicals</a></li>
  54. <li class="toctree-l3 current"><a class="current reference internal" href="#">macop.policies.reinforcement</a></li>
  55. </ul>
  56. </li>
  57. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop-solution">macop.solution</a></li>
  58. <li class="toctree-l2"><a class="reference internal" href="../api.html#macop-utils">macop.utils</a></li>
  59. </ul>
  60. </li>
  61. <li class="toctree-l1"><a class="reference internal" href="../documentations/index.html">Documentation</a></li>
  62. <li class="toctree-l1"><a class="reference internal" href="../examples.html">Some examples</a></li>
  63. <li class="toctree-l1"><a class="reference internal" href="../contributing.html">Contributing</a></li>
  64. </ul>
  65. </div>
  66. </div>
  67. </nav>
  68. <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
  69. <nav class="wy-nav-top" aria-label="top navigation">
  70. <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
  71. <a href="../index.html">macop</a>
  72. </nav>
  73. <div class="wy-nav-content">
  74. <div class="rst-content">
  75. <div role="navigation" aria-label="breadcrumbs navigation">
  76. <ul class="wy-breadcrumbs">
  77. <li><a href="../index.html">Docs</a> &raquo;</li>
  78. <li><a href="../api.html">API</a> &raquo;</li>
  79. <li>macop.policies.reinforcement</li>
  80. <li class="wy-breadcrumbs-aside">
  81. <a href="https://github.com/prise-3d/macop/blob/master/docs/source/macop/macop.policies.reinforcement.rst" class="fa fa-github"> Edit on GitHub</a>
  82. </li>
  83. </ul>
  84. <hr/>
  85. </div>
  86. <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
  87. <div itemprop="articleBody">
  88. <div class="section" id="module-macop.policies.reinforcement">
  89. <span id="macop-policies-reinforcement"></span><h1>macop.policies.reinforcement<a class="headerlink" href="#module-macop.policies.reinforcement" title="Permalink to this headline">¶</a></h1>
  90. <p>Reinforcement learning policy classes implementations for Operator Selection Strategy</p>
  91. <p class="rubric">Classes</p>
  92. <table class="longtable docutils align-default">
  93. <colgroup>
  94. <col style="width: 10%" />
  95. <col style="width: 90%" />
  96. </colgroup>
  97. <tbody>
  98. <tr class="row-odd"><td><p><a class="reference internal" href="#macop.policies.reinforcement.UCBPolicy" title="macop.policies.reinforcement.UCBPolicy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">UCBPolicy</span></code></a>(operators[, C, exp_rate])</p></td>
  99. <td><p>Upper Confidence Bound (UCB) policy class which is used for applying UCB strategy when selecting and applying operator</p></td>
  100. </tr>
  101. </tbody>
  102. </table>
  103. <dl class="class">
  104. <dt id="macop.policies.reinforcement.UCBPolicy">
  105. <em class="property">class </em><code class="sig-prename descclassname">macop.policies.reinforcement.</code><code class="sig-name descname">UCBPolicy</code><span class="sig-paren">(</span><em class="sig-param">operators</em>, <em class="sig-param">C=100.0</em>, <em class="sig-param">exp_rate=0.5</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/macop/policies/reinforcement.html#UCBPolicy"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy" title="Permalink to this definition">¶</a></dt>
  106. <dd><p>Upper Confidence Bound (UCB) policy class which is used for applying UCB strategy when selecting and applying operator</p>
  107. <p>Rather than performing exploration by simply selecting an arbitrary action, chosen with a probability that remains constant,
  108. the UCB algorithm changes its exploration-exploitation balance as it gathers more knowledge of the environment.
  109. It moves from being primarily focused on exploration, when actions that have been tried the least are preferred,
  110. to instead concentrate on exploitation, selecting the action with the highest estimated reward.</p>
  111. <ul class="simple">
  112. <li><p>Resource link: <a class="reference external" href="https://banditalgs.com/2016/09/18/the-upper-confidence-bound-algorithm/">https://banditalgs.com/2016/09/18/the-upper-confidence-bound-algorithm/</a></p></li>
  113. </ul>
  114. <dl class="attribute">
  115. <dt id="macop.policies.reinforcement.UCBPolicy.operators">
  116. <code class="sig-name descname">operators</code><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.operators" title="Permalink to this definition">¶</a></dt>
  117. <dd><p>{[Operator]} – list of selected operators for the algorithm</p>
  118. </dd></dl>
  119. <dl class="attribute">
  120. <dt id="macop.policies.reinforcement.UCBPolicy.C">
  121. <code class="sig-name descname">C</code><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.C" title="Permalink to this definition">¶</a></dt>
  122. <dd><p>{float} – The second half of the UCB equation adds exploration, with the degree of exploration being controlled by the hyper-parameter <cite>C</cite>.</p>
  123. </dd></dl>
  124. <dl class="attribute">
  125. <dt id="macop.policies.reinforcement.UCBPolicy.exp_rate">
  126. <code class="sig-name descname">exp_rate</code><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.exp_rate" title="Permalink to this definition">¶</a></dt>
  127. <dd><p>{float} – exploration rate (probability to choose randomly next operator)</p>
  128. </dd></dl>
  129. <dl class="attribute">
  130. <dt id="macop.policies.reinforcement.UCBPolicy.rewards">
  131. <code class="sig-name descname">rewards</code><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.rewards" title="Permalink to this definition">¶</a></dt>
  132. <dd><p>{[float]} – list of summed rewards obtained for each operator</p>
  133. </dd></dl>
  134. <dl class="attribute">
  135. <dt id="macop.policies.reinforcement.UCBPolicy.occurrences">
  136. <code class="sig-name descname">occurrences</code><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.occurrences" title="Permalink to this definition">¶</a></dt>
  137. <dd><p>{[int]} – number of use (selected) of each operator</p>
  138. </dd></dl>
  139. <p>Example:</p>
  140. <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="c1"># operators import</span>
  141. <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">macop.operators.discrete.crossovers</span> <span class="kn">import</span> <span class="n">SimpleCrossover</span>
  142. <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">macop.operators.discrete.mutators</span> <span class="kn">import</span> <span class="n">SimpleMutation</span>
  143. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># policy import</span>
  144. <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">macop.policies.reinforcement</span> <span class="kn">import</span> <span class="n">UCBPolicy</span>
  145. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># solution and algorithm</span>
  146. <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">macop.solutions.discrete</span> <span class="kn">import</span> <span class="n">BinarySolution</span>
  147. <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">macop.algorithms.mono</span> <span class="kn">import</span> <span class="n">IteratedLocalSearch</span>
  148. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># evaluator import</span>
  149. <span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">macop.evaluators.discrete.mono</span> <span class="kn">import</span> <span class="n">KnapsackEvaluator</span>
  150. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># evaluator initialization (worths objects passed into data)</span>
  151. <span class="gp">&gt;&gt;&gt; </span><span class="n">worths</span> <span class="o">=</span> <span class="p">[</span> <span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">20</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span> <span class="p">]</span>
  152. <span class="gp">&gt;&gt;&gt; </span><span class="n">evaluator</span> <span class="o">=</span> <span class="n">KnapsackEvaluator</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;worths&#39;</span><span class="p">:</span> <span class="n">worths</span><span class="p">})</span>
  153. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># validator specification (based on weights of each objects)</span>
  154. <span class="gp">&gt;&gt;&gt; </span><span class="n">weights</span> <span class="o">=</span> <span class="p">[</span> <span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">30</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">20</span><span class="p">)</span> <span class="p">]</span>
  155. <span class="gp">&gt;&gt;&gt; </span><span class="n">validator</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">solution</span><span class="p">:</span> <span class="kc">True</span> <span class="k">if</span> <span class="nb">sum</span><span class="p">([</span><span class="n">weights</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">solution</span><span class="o">.</span><span class="n">_data</span><span class="p">)</span> <span class="k">if</span> <span class="n">value</span> <span class="o">==</span> <span class="mi">1</span><span class="p">])</span> <span class="o">&lt;</span> <span class="mi">200</span> <span class="k">else</span> <span class="kc">False</span>
  156. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># initializer function with lambda function</span>
  157. <span class="gp">&gt;&gt;&gt; </span><span class="n">initializer</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="o">=</span><span class="mi">20</span><span class="p">:</span> <span class="n">BinarySolution</span><span class="o">.</span><span class="n">random</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">validator</span><span class="p">)</span>
  158. <span class="gp">&gt;&gt;&gt; </span><span class="c1"># operators list with crossover and mutation</span>
  159. <span class="gp">&gt;&gt;&gt; </span><span class="n">operators</span> <span class="o">=</span> <span class="p">[</span><span class="n">SimpleCrossover</span><span class="p">(),</span> <span class="n">SimpleMutation</span><span class="p">()]</span>
  160. <span class="gp">&gt;&gt;&gt; </span><span class="n">policy</span> <span class="o">=</span> <span class="n">UCBPolicy</span><span class="p">(</span><span class="n">operators</span><span class="p">)</span>
  161. <span class="gp">&gt;&gt;&gt; </span><span class="n">algo</span> <span class="o">=</span> <span class="n">IteratedLocalSearch</span><span class="p">(</span><span class="n">initializer</span><span class="p">,</span> <span class="n">evaluator</span><span class="p">,</span> <span class="n">operators</span><span class="p">,</span> <span class="n">policy</span><span class="p">,</span> <span class="n">validator</span><span class="p">,</span> <span class="n">maximise</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
  162. <span class="gp">&gt;&gt;&gt; </span><span class="n">policy</span><span class="o">.</span><span class="n">_occurences</span>
  163. <span class="go">[0, 0]</span>
  164. <span class="gp">&gt;&gt;&gt; </span><span class="n">solution</span> <span class="o">=</span> <span class="n">algo</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
  165. <span class="gp">&gt;&gt;&gt; </span><span class="nb">type</span><span class="p">(</span><span class="n">solution</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span>
  166. <span class="go">&#39;BinarySolution&#39;</span>
  167. <span class="gp">&gt;&gt;&gt; </span><span class="n">policy</span><span class="o">.</span><span class="n">_occurences</span> <span class="c1"># one more due to first evaluation</span>
  168. <span class="go">[51, 53]</span>
  169. </pre></div>
  170. </div>
  171. <dl class="method">
  172. <dt id="macop.policies.reinforcement.UCBPolicy.apply">
  173. <code class="sig-name descname">apply</code><span class="sig-paren">(</span><em class="sig-param">solution</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/macop/policies/reinforcement.html#UCBPolicy.apply"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.apply" title="Permalink to this definition">¶</a></dt>
  174. <dd><p>Apply specific operator chosen to create new solution, computes its fitness and returns solution</p>
  175. <ul class="simple">
  176. <li><p>fitness improvment is saved as rewards</p></li>
  177. <li><p>selected operator occurence is also increased</p></li>
  178. </ul>
  179. <dl class="field-list simple">
  180. <dt class="field-odd">Parameters</dt>
  181. <dd class="field-odd"><p><strong>solution</strong> – {Solution} – the solution to use for generating new solution</p>
  182. </dd>
  183. <dt class="field-even">Returns</dt>
  184. <dd class="field-even"><p>{Solution} – new generated solution</p>
  185. </dd>
  186. </dl>
  187. </dd></dl>
  188. <dl class="method">
  189. <dt id="macop.policies.reinforcement.UCBPolicy.select">
  190. <code class="sig-name descname">select</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/macop/policies/reinforcement.html#UCBPolicy.select"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#macop.policies.reinforcement.UCBPolicy.select" title="Permalink to this definition">¶</a></dt>
  191. <dd><p>Select using Upper Confidence Bound the next operator to use (using acquired rewards)</p>
  192. <dl class="field-list simple">
  193. <dt class="field-odd">Returns</dt>
  194. <dd class="field-odd"><p>the selected operator</p>
  195. </dd>
  196. <dt class="field-even">Return type</dt>
  197. <dd class="field-even"><p>{Operator}</p>
  198. </dd>
  199. </dl>
  200. </dd></dl>
  201. </dd></dl>
  202. </div>
  203. </div>
  204. </div>
  205. <footer>
  206. <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
  207. <a href="macop.solutions.base.html" class="btn btn-neutral float-right" title="macop.solutions.base" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
  208. <a href="macop.policies.classicals.html" class="btn btn-neutral float-left" title="macop.policies.classicals" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
  209. </div>
  210. <hr/>
  211. <div role="contentinfo">
  212. <p>
  213. &copy; Copyright 2020, Jérôme BUISINE
  214. </p>
  215. </div>
  216. Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
  217. </footer>
  218. </div>
  219. </div>
  220. </section>
  221. </div>
  222. <script type="text/javascript">
  223. jQuery(function () {
  224. SphinxRtdTheme.Navigation.enable(true);
  225. });
  226. </script>
  227. </body>
  228. </html>