{"entities":{"Q1870312":{"pageid":1881054,"ns":120,"title":"Item:Q1870312","lastrevid":73823079,"modified":"2026-04-14T17:23:20Z","type":"item","id":"Q1870312","labels":{"en":{"language":"en","value":"Approximate gradient methods in policy-space optimization of Markov reward processes"}},"descriptions":{"en":{"language":"en","value":"scientific article; zbMATH DE number 1908599"}},"aliases":{},"claims":{"P31":[{"mainsnak":{"snaktype":"value","property":"P31","hash":"fd5912e4dab4b881a8eb0eb27e7893fef55176ad","datavalue":{"value":{"entity-type":"item","numeric-id":56887,"id":"Q56887"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870312$378FC028-F3E8-4274-9649-84E314392330","rank":"normal"}],"P159":[{"mainsnak":{"snaktype":"value","property":"P159","hash":"1bf0aeeb91c92a9846d4a017b5002f994c8d1aec","datavalue":{"value":{"text":"Approximate gradient methods in policy-space optimization of Markov reward processes","language":"en"},"type":"monolingualtext"},"datatype":"monolingualtext"},"type":"statement","id":"Q1870312$A90EA2BE-6FE3-4457-A26D-F3963BBFB57E","rank":"normal"}],"P225":[{"mainsnak":{"snaktype":"value","property":"P225","hash":"00698eb187ec27b20a3f313fff5de03f89f9029e","datavalue":{"value":"1042.93061","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870312$C7648A8E-F613-4DC2-ABCC-8C1862796D3C","rank":"normal"}],"P27":[{"mainsnak":{"snaktype":"value","property":"P27","hash":"b3b6d0eeb01e8c643abecceeaed45296429dd155","datavalue":{"value":"10.1023/A:1022145020786","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870312$7A17D4DE-ADB3-40F0-A49C-537773C5959A","rank":"normal"}],"P16":[{"mainsnak":{"snaktype":"value","property":"P16","hash":"ee3e61dd2f1d3b7d226cba6544862c27c260acbd","datavalue":{"value":{"entity-type":"item","numeric-id":1870311,"id":"Q1870311"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870312$8F8B9A6E-6BA0-46AB-8098-139CFF1C8562","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P16","hash":"17d5fe570bd427f5c09bf9920260ff71ebaf3dbd","datavalue":{"value":{"entity-type":"item","numeric-id":290261,"id":"Q290261"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870312$AD0F1DC8-0E40-4B07-B7ED-B816C8B072BF","rank":"normal"}],"P200":[{"mainsnak":{"snaktype":"value","property":"P200","hash":"0f7c04b6b94cf13c193d472c226068c28e7c8f04","datavalue":{"value":{"entity-type":"item","numeric-id":169421,"id":"Q169421"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870312$54D2E0DB-FE8D-44F3-8D06-DA813E2DF2EE","rank":"normal"}],"P28":[{"mainsnak":{"snaktype":"value","property":"P28","hash":"947c49c6dc991ad4ccf38c0cba49a099db39b9c3","datavalue":{"value":{"time":"+2003-05-11T00:00:00Z","timezone":0,"before":0,"after":0,"precision":11,"calendarmodel":"http://www.wikidata.org/entity/Q1985727"},"type":"time"},"datatype":"time"},"type":"statement","id":"Q1870312$98E6328A-2BCE-4D66-90A9-8550675F1B93","rank":"normal"}],"P1448":[{"mainsnak":{"snaktype":"value","property":"P1448","hash":"23d08130d02be60c80f13b51c76945402d63d661","datavalue":{"value":"This paper considers a discrete time, finite state Markov reward process depending on a set of parameters. After a brief review of stochastic gradient descend methods, the resulting algorithms can be implemented online, and have the property that the gradient of the average reward converges to zero with probability one. The updates can have a high variance resulting in a slow convergence. Two approaches reduce the variance and depend on approximate gradient formulas. Bounds for the resulting bias terms are derived. The methodology is applied to Markov reward processes.","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870312$805602D4-0558-4B02-AEF8-7D3607C6E9C7","rank":"normal"}],"P1447":[{"mainsnak":{"snaktype":"value","property":"P1447","hash":"69cf90e6b5a4c49e3b66b4d143069a6dc477eea1","datavalue":{"value":{"entity-type":"item","numeric-id":588270,"id":"Q588270"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870312$5FA9E53B-7D2E-4306-8445-D9797DAF53E3","rank":"normal"}],"P226":[{"mainsnak":{"snaktype":"value","property":"P226","hash":"e3bc6c7cda8279d6ad2b578e7a7e4967447c3cc4","datavalue":{"value":"93E20","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870312$CC65AB1C-3A7F-4642-8CEB-DC2B1B25CB3F","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"39a0025ac480893a6c3723330b06364463bbd01f","datavalue":{"value":"93C55","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870312$30FA6C3F-6A3B-4514-BD24-59D88FE581A4","rank":"normal"}],"P1451":[{"mainsnak":{"snaktype":"value","property":"P1451","hash":"ba5b698a11cf38cbb9b0e89bbaa598a6f51a834f","datavalue":{"value":"1908599","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870312$B489DA69-1970-4035-9549-DE331D58D5DE","rank":"normal"}],"P1450":[{"mainsnak":{"snaktype":"value","property":"P1450","hash":"4cbbc041e4b7868cae592d23cc37ee2fcaaa64a0","datavalue":{"value":"Markov reward processes","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870312$A7C754D9-C0DE-414E-B474-45D088E16524","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"e1a200bb1153be637fb7bd1a3381fb5973eca58a","datavalue":{"value":"simulation-based optimization","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870312$FF137040-17B9-4F5E-B182-A8B348954844","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"027b25edd768a3dad9836f5cc36b32a0b6f823e8","datavalue":{"value":"policy-space optimization","type":"string"},"datatype":"string"},"type":"statement","id":"Q1870312$B360342B-EB7B-4F24-A242-E8F5446D254D","rank":"normal"}],"P1460":[{"mainsnak":{"snaktype":"value","property":"P1460","hash":"57f7fea50d2ce1b39b695c4a1313582eed405e38","datavalue":{"value":{"entity-type":"item","numeric-id":5976449,"id":"Q5976449"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q1870312$73049F3C-DC16-48CB-9D91-CD5F08656268","rank":"normal"}],"P205":[{"mainsnak":{"snaktype":"value","property":"P205","hash":"b6e408d5db6ede8b49c598b82be2ad6ef74622db","datavalue":{"value":"https://doi.org/10.1023/a:1022145020786","type":"string"},"datatype":"url"},"type":"statement","id":"Q1870312$8C5D7D15-EBCE-4B19-8293-D4CC88C523BD","rank":"normal"}],"P388":[{"mainsnak":{"snaktype":"value","property":"P388","hash":"4f830c37b8b5ffcbee81ae8e6b48d3f06fe255c6","datavalue":{"value":"W1554366315","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q1870312$641E8718-B3B0-48F8-9675-2FD5F908A181","rank":"normal"}],"P1643":[{"mainsnak":{"snaktype":"value","property":"P1643","hash":"c6075bcc036229389076ef0a370e99eb7ef5db64","datavalue":{"value":{"entity-type":"item","numeric-id":4925757,"id":"Q4925757"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"6ea5d1bd1ff283e1463d201fb22a4f7ecc661658","datavalue":{"value":{"amount":"+0.8240536451339722","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870312$DCB9F85C-6A8A-4865-86F0-8D945351A4AC","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"3e3b31fbe75ce2213038a5b034182e7f074a0a60","datavalue":{"value":{"entity-type":"item","numeric-id":3772003,"id":"Q3772003"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"5c51e6ebf47e066f5dc58093d784931b6f33c962","datavalue":{"value":{"amount":"+0.793571412563324","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870312$C3C30601-9E94-43FA-B1D1-A0E65B1F66C5","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"26eb69c1c374653a579144d09e1020002c44c67b","datavalue":{"value":{"entity-type":"item","numeric-id":4540300,"id":"Q4540300"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"f77a8188fccab1cc573644873e96a112228739de","datavalue":{"value":{"amount":"+0.7930338978767395","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870312$70D87D28-95C9-4812-BCC0-C6AF07D981BC","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"377ae0926e5599d0a89873a149ff75f8595aec56","datavalue":{"value":{"entity-type":"item","numeric-id":3093352,"id":"Q3093352"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"a72a7606cba0729f523571a90d0c7fab8dbc4bfa","datavalue":{"value":{"amount":"+0.7794969081878662","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870312$9D0BBA46-9186-4581-929B-C9BA9F1E66E8","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"740084bc5efaa56cf3cfde88cbb26fa4d3c0f602","datavalue":{"value":{"entity-type":"item","numeric-id":1745941,"id":"Q1745941"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"f3725c185df974e449df41554ea3e79f5bc10d42","datavalue":{"value":{"amount":"+0.7698516845703125","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q1870312$89979812-B727-4F50-963A-1EADD1AD0D8F","rank":"normal"}]},"sitelinks":{"mardi":{"site":"mardi","title":"Approximate gradient methods in policy-space optimization of Markov reward processes","badges":[],"url":"https://portal.mardi4nfdi.de/wiki/Approximate_gradient_methods_in_policy-space_optimization_of_Markov_reward_processes"}}}}}