<?xml version="1.0" encoding="UTF-8"?><marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim">
  <marc:record>
    <marc:leader>00000nam  2200000zi 4500</marc:leader>
    <marc:controlfield tag="001">9.895997</marc:controlfield>
    <marc:controlfield tag="003">CaOODSP</marc:controlfield>
    <marc:controlfield tag="005">20221107174206</marc:controlfield>
    <marc:controlfield tag="006">m     o  d f      </marc:controlfield>
    <marc:controlfield tag="007">cr cn|||||||||</marc:controlfield>
    <marc:controlfield tag="008">210201e20210201oncd    ob   f000 0 eng d</marc:controlfield>
    <marc:datafield tag="040" ind1=" " ind2=" ">
      <marc:subfield code="a">CaOODSP</marc:subfield>
      <marc:subfield code="b">eng</marc:subfield>
      <marc:subfield code="e">rda</marc:subfield>
      <marc:subfield code="c">CaOODSP</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="086" ind1="1" ind2=" ">
      <marc:subfield code="a">FB3-5/2021-7E-PDF</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="100" ind1="1" ind2=" ">
      <marc:subfield code="a">Castro, Pablo S., </marc:subfield>
      <marc:subfield code="e">author.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="245" ind1="1" ind2="0">
      <marc:subfield code="a">Estimating policy functions in payments systems using reinforcement learning / </marc:subfield>
      <marc:subfield code="c">by Pablo S. Castro, Ajit Desai, Han Du, Rodney Garratt and Francisco Rivadeneyra.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="264" ind1=" " ind2="1">
      <marc:subfield code="a">Ottawa, Ontario, Canada : </marc:subfield>
      <marc:subfield code="b">Bank of Canada = Banque du Canada, </marc:subfield>
      <marc:subfield code="c">February 1, 2021.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="264" ind1=" " ind2="4">
      <marc:subfield code="c">©2021</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="300" ind1=" " ind2=" ">
      <marc:subfield code="a">1 online resource (ii, 40 pages) : </marc:subfield>
      <marc:subfield code="b">colour graphs</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="336" ind1=" " ind2=" ">
      <marc:subfield code="a">text</marc:subfield>
      <marc:subfield code="b">txt</marc:subfield>
      <marc:subfield code="2">rdacontent</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="337" ind1=" " ind2=" ">
      <marc:subfield code="a">computer</marc:subfield>
      <marc:subfield code="b">c</marc:subfield>
      <marc:subfield code="2">rdamedia</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="338" ind1=" " ind2=" ">
      <marc:subfield code="a">online resource</marc:subfield>
      <marc:subfield code="b">cr</marc:subfield>
      <marc:subfield code="2">rdacarrier</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="490" ind1="1" ind2=" ">
      <marc:subfield code="a">Staff working paper = </marc:subfield>
      <marc:subfield code="a">Document de travail du personnel, </marc:subfield>
      <marc:subfield code="x">1701-9397 ; </marc:subfield>
      <marc:subfield code="v">2021-7</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="504" ind1=" " ind2=" ">
      <marc:subfield code="a">Includes bibliographical references (pages 20-21).</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="520" ind1="3" ind2=" ">
      <marc:subfield code="a">"This paper uses reinforcement learning (RL) to approximate the policy rules of banks participating in a high-value payments system. The objective of the agents is to learn a policy function for the choice of amount of liquidity provided to the system at the beginning of the day. Individual choices have complex strategic effects precluding a closed form solution of the optimal policy, except in simple cases. We show that in a simplified two-agent setting, agents using reinforcement learning do learn the optimal policy that minimizes the cost of processing their individual payments. We also show that in more complex settings, both agents learn to reduce their liquidity costs. Our results show the applicability of RL to estimate best-response functions in real-world strategic games"--Abstract, page ii.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="0">
      <marc:subfield code="a">Banks and banking.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="0">
      <marc:subfield code="a">Reinforcement learning.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="0">
      <marc:subfield code="a">Payment.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="6">
      <marc:subfield code="a">Banques.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="6">
      <marc:subfield code="a">Apprentissage par renforcement (Intelligence artificielle)</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="6">
      <marc:subfield code="a">Paiement.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="710" ind1="2" ind2=" ">
      <marc:subfield code="a">Bank of Canada, </marc:subfield>
      <marc:subfield code="e">issuing body.</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="830" ind1="#" ind2="0">
      <marc:subfield code="a">Staff working paper (Bank of Canada)</marc:subfield>
      <marc:subfield code="v">2021-7.</marc:subfield>
      <marc:subfield code="w">(CaOODSP)9.806221</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="856" ind1="4" ind2="0">
      <marc:subfield code="q">PDF</marc:subfield>
      <marc:subfield code="s">1.36 MB</marc:subfield>
      <marc:subfield code="u">https://publications.gc.ca/collections/collection_2021/banque-bank-canada/FB3-5-2021-7-eng.pdf</marc:subfield>
    </marc:datafield>
  </marc:record>
</marc:collection>
