{"entities":{"Q1870310":{"pageid":1881052,"ns":120,"title":"Item:Q1870310","lastrevid":48095239,"modified":"2026-01-03T15:11:55Z","type":"item","id":"Q1870310","labels":{"en":{"language":"en","value":"Least squares policy evaluation algorithms with linear function approximation"}},"descriptions":{"en":{"language":"en","value":"scientific article; zbMATH DE number 1908598"}},"aliases":{},"claims":{"P31":[{"mainsnak":{"snaktype":"value","property":"P31","hash":"fd5912e4dab4b881a8eb0eb27e7893fef55176ad","datavalue":{"value":{"entity-type":"item","numeric-id":56887,"id":"Q56887"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870310$E62C9995-A9F2-4E73-B2C6-04C2EA301503","rank":"normal"}],"P159":[{"mainsnak":{"snaktype":"value","property":"P159","hash":"20a66936bb4cc7e0564b13c409b8660e801d090e","datavalue":{"value":{"text":"Least squares policy evaluation algorithms with linear function approximation","language":"en"},"type":"monolingualtext"},"datatype":"monolingualtext"},"type":"statement","id":"Q1870310$5C8450CA-2E5B-42C2-8587-75A25640D0C3","rank":"normal"}],"P225":[{"mainsnak":{"snaktype":"value","property":"P225","hash":"1a2f6d90578df26278cec4cf53e9264a69b783bd","datavalue":{"value":"1030.93061","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$738A01A2-14C9-44E3-86B2-175FECFC6403","rank":"normal"}],"P27":[{"mainsnak":{"snaktype":"value","property":"P27","hash":"9a2860ebc4ae50247d1e3911f323d0209d0c8026","datavalue":{"value":"10.1023/A:1022192903948","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$5F32EB63-A06C-476B-AC25-B8BCCF7622B5","rank":"normal"}],"P16":[{"mainsnak":{"snaktype":"value","property":"P16","hash":"5f0fbf40b7982001332b202f83ec435af56cf0ea","datavalue":{"value":{"entity-type":"item","numeric-id":378730,"id":"Q378730"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870310$EF9A42A1-A1E2-4835-8127-133E80216DC1","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P16","hash":"f3c01a52afec20dad1cdc36d7621a85e8894bddb","datavalue":{"value":{"entity-type":"item","numeric-id":445031,"id":"Q445031"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870310$E4127FAB-4F31-4D56-B261-7673B290C773","rank":"normal"}],"P200":[{"mainsnak":{"snaktype":"value","property":"P200","hash":"0f7c04b6b94cf13c193d472c226068c28e7c8f04","datavalue":{"value":{"entity-type":"item","numeric-id":169421,"id":"Q169421"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870310$8C3CFCE8-3E28-465A-BD8F-91CBDF0D7984","rank":"normal"}],"P28":[{"mainsnak":{"snaktype":"value","property":"P28","hash":"947c49c6dc991ad4ccf38c0cba49a099db39b9c3","datavalue":{"value":{"time":"+2003-05-11T00:00:00Z","timezone":0,"before":0,"after":0,"precision":11,"calendarmodel":"http://www.wikidata.org/entity/Q1985727"},"type":"time"},"datatype":"time"},"type":"statement","id":"Q1870310$86EF38C2-9C93-4F29-974A-47E602738B00","rank":"normal"}],"P1448":[{"mainsnak":{"snaktype":"value","property":"P1448","hash":"6430f9ef994021d79cdc1181da2348bbd40ccac8","datavalue":{"value":"This paper deals with policy evaluation algorithms within the framework of infinite-horizon dynamic programming problems with discounted cost. The authors consider the discrete-time stationary Markov chain with state space \\(\\{1,2,\\dots, n\\}\\) and the cost vector \\(J\\), given by  \\[ J(i)= E\\Biggl[\\sum^\\infty_{t=0} \\alpha^t g(i_t, i_{t+ 1})/i_0= i\\Biggr], \\]  \\(i= 1,2,\\dots,n\\), where \\(i_t\\) denotes the state of the system at time \\(t\\). The authors discuss two methods to calculate \\(J\\) approximately, using simulation, temporal difference and linear cost function approximation. The first method is a new algorithm, including the use of least square subproblems that are solved recursively and with a diminishing stepsize. The second method is the \\(\\text{LSTD}(\\lambda)\\) algorithm, proposed by Boyan (to appear in Machine Learning). They state convergence results for these two algorithms.","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$32CB6006-99A1-4131-8327-C2A2C84F52CC","rank":"normal"}],"P226":[{"mainsnak":{"snaktype":"value","property":"P226","hash":"8507c5cb252c5fa8df88e8def65b15a4ca8f94f3","datavalue":{"value":"93E24","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$1A735299-B8E0-4046-B8A6-AAA05F0FFD60","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"e3bc6c7cda8279d6ad2b578e7a7e4967447c3cc4","datavalue":{"value":"93E20","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$F977F9BE-2EE2-4E00-8664-73EF891FCD89","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"1cdf15533e26fc0c4c2e22d28e655c364dfe77a6","datavalue":{"value":"60J10","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$8D5EC032-C174-4AEE-8F84-70D877C5DB49","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"e134fa16ab2f5a6350ceac50979452dda7b0aefa","datavalue":{"value":"93B18","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$CE366ACC-7454-4032-9E5E-E7BA147CDB04","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"39a0025ac480893a6c3723330b06364463bbd01f","datavalue":{"value":"93C55","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$D1B73DB6-8C47-4CB5-A85B-8FD2E859E0C6","rank":"normal"}],"P1451":[{"mainsnak":{"snaktype":"value","property":"P1451","hash":"aa29ae319ce41dc302aeaad29ce263a46dd391c2","datavalue":{"value":"1908598","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870310$0F022ED7-AAA0-4C76-BD7B-491AC6691E37","rank":"normal"}],"P1450":[{"mainsnak":{"snaktype":"value","property":"P1450","hash":"164f1b47f29c66b3e983a2464d5221c265bbdaa8","datavalue":{"value":"linear function approximation","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$E41C1AA2-DB26-4D44-B040-79470171CD1D","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"8303a89e5e979e44e65aa210688e1071093c7c30","datavalue":{"value":"martingale","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$23DA7120-20C8-4529-AA03-F844665646D0","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"26048f6fe1e07cc4223826747c55d4ef1cd7970b","datavalue":{"value":"least-square methods","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$06A6191E-4E61-4C76-BBBA-6806BB25BEA4","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"291b553d3d2f27e1808d7266f3983ba1bdeee092","datavalue":{"value":"policy evaluation algorithms","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$0A4859E4-41F3-45F1-953F-AD8670B0B856","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"4fe8dd8c9ce8cea53b788d0b2e9e5b367079519d","datavalue":{"value":"infinite-horizon dynamic programming","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$6DB46BBC-41D4-4FC5-9E1B-8D4A11DBC4FA","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"093fd34412008b9cd918a736fcd755319f66a668","datavalue":{"value":"discrete-time stationary Markov chain","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$BEAAD2CF-2A8F-4E7B-9573-EFF9DF9D98C3","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"f828563fee5a57e5f16da87a79378ac587cea3ac","datavalue":{"value":"simulation","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$1975CA16-65D0-4CC3-98F0-F82E545D2AED","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"586d8ae342cc91fbbe50b01c60efed8832b560bb","datavalue":{"value":"temporal difference","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$044B5EAD-3F1A-43E6-A6EC-EB26B12FE3DB","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"c509a25654dbf50da068f9e591fdd1df1660c9bb","datavalue":{"value":"stepsize","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$5A486998-0AF3-44B7-9390-8F7D45B27C84","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"fe9401d13373367577364f9d3727ea4cc1291d19","datavalue":{"value":"\\(\\text{LSTD}(\\lambda)\\) algorithm","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$29CDE576-7631-4EDE-B69A-3DD450D84712","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"d41bda66975993acb69d80899284d68af46b0d9b","datavalue":{"value":"convergence results","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870310$9C0B22A1-A348-4DBF-8BB0-C170943CB3A3","rank":"normal"}],"P1447":[{"mainsnak":{"snaktype":"value","property":"P1447","hash":"3d7fb3f5b4e42c0f325edfa03b0c2c63351ebc67","datavalue":{"value":{"entity-type":"item","numeric-id":731708,"id":"Q731708"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870310$B92CE732-8DBF-4560-8F5F-9E838692B730","rank":"normal"}],"P1460":[{"mainsnak":{"snaktype":"value","property":"P1460","hash":"57f7fea50d2ce1b39b695c4a1313582eed405e38","datavalue":{"value":{"entity-type":"item","numeric-id":5976449,"id":"Q5976449"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870310$0AAEADFE-AB9F-4EAB-B259-C1BE3CC719E3","rank":"normal"}],"P1643":[{"mainsnak":{"snaktype":"value","property":"P1643","hash":"81a62751b92f8cd596b8a3c8d08fc874b299d030","datavalue":{"value":{"entity-type":"item","numeric-id":4910565,"id":"Q4910565"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"ab6eba6409b482362c27ab467398d0631338f4c3","datavalue":{"value":{"amount":"+0.8827800154685974","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870310$263E2736-1CA9-4750-8286-892595237DF7","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"3a60bdcb44a98b6cea45f34c1088d0c9c6561ea8","datavalue":{"value":{"entity-type":"item","numeric-id":1959511,"id":"Q1959511"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"bfc42d62a728cc6a3fdc863520530c2d2a886bc9","datavalue":{"value":{"amount":"+0.8659302592277527","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870310$EADA4DDD-2B06-4D12-A484-56DFF8D4FACA","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"3e0d3ef632e83b0f7e49cddae362ec492d4338cc","datavalue":{"value":{"entity-type":"item","numeric-id":2887629,"id":"Q2887629"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"9d0d64b9d2504aafe5bbaf083f0b6356d04e7a34","datavalue":{"value":{"amount":"+0.8442625999450684","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870310$0943138A-616E-4EF8-AD0D-3D3E7BACE411","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"ec011472c27d6802a471b877fc3b24aa609d6deb","datavalue":{"value":{"entity-type":"item","numeric-id":1911340,"id":"Q1911340"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"28a27e83fbc5653affe4f3719e7592f1a30c114b","datavalue":{"value":{"amount":"+0.8218328952789307","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870310$4F714C73-9779-4183-98FF-C158385C9678","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"8b00dad17fae37735e2b2e938beb40198fa0b8b2","datavalue":{"value":{"entity-type":"item","numeric-id":5477859,"id":"Q5477859"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"2c35e457fca80ae67bc69152fbe852e6c18adea7","datavalue":{"value":{"amount":"+0.8212753534317017","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870310$485C8000-F7B8-4398-BAA2-94074A133F1F","rank":"normal"}]},"sitelinks":{"mardi":{"site":"mardi","title":"Publication:1870310","badges":[],"url":"https://portal.mardi4nfdi.de/wiki/Publication:1870310"}}}}}