{"entities":{"Q2834442":{"pageid":2845172,"ns":120,"title":"Item:Q2834442","lastrevid":58017450,"modified":"2026-04-03T11:32:06Z","type":"item","id":"Q2834442","labels":{"en":{"language":"en","value":"Dual control for approximate Bayesian reinforcement learning"}},"descriptions":{"en":{"language":"en","value":"scientific article; zbMATH DE number 6655043"}},"aliases":{},"claims":{"P31":[{"mainsnak":{"snaktype":"value","property":"P31","hash":"fd5912e4dab4b881a8eb0eb27e7893fef55176ad","datavalue":{"value":{"entity-type":"item","numeric-id":56887,"id":"Q56887"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q2834442$2FAADAF0-7CA1-40F9-8EA3-599BD01F8AD7","rank":"normal"}],"P225":[{"mainsnak":{"snaktype":"value","property":"P225","hash":"6b6aa5b577f709f468c1c9409cfee13b67778a68","datavalue":{"value":"1359.68251","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q2834442$A313EAC8-E530-4208-AC2F-FEF4E2E126A3","rank":"normal"}],"P16":[{"mainsnak":{"snaktype":"value","property":"P16","hash":"cd7e36dc7100b0bfeff65e18dc40934a0c803613","datavalue":{"value":{"entity-type":"item","numeric-id":2834441,"id":"Q2834441"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q2834442$05716325-756A-457F-834A-2BDF6E54B326","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P16","hash":"c0df7243022d5444911b506eea7644c7df8f919d","datavalue":{"value":{"entity-type":"item","numeric-id":516560,"id":"Q516560"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q2834442$D32AB8D4-6C5A-4037-982C-20F23B1BAB03","rank":"normal"}],"P28":[{"mainsnak":{"snaktype":"value","property":"P28","hash":"a18793aeb6a89d8a2af4c11b79598ffabdcf227c","datavalue":{"value":{"time":"+2016-11-22T00:00:00Z","timezone":0,"before":0,"after":0,"precision":11,"calendarmodel":"http://www.wikidata.org/entity/Q1985727"},"type":"time"},"datatype":"time"},"type":"statement","id":"Q2834442$ED138C4E-D72C-4204-A3EA-F1C2396D55E3","rank":"normal"}],"P205":[{"mainsnak":{"snaktype":"value","property":"P205","hash":"5a8807e181c7f24ac5dbcc6009fd4e401d38b8d7","datavalue":{"value":"https://arxiv.org/abs/1510.03591","type":"string"},"datatype":"url"},"type":"statement","id":"Q2834442$B390D1DE-BDEB-4DB7-B6B6-0B351D8ED608","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P205","hash":"a4390fb68d89be42ed574d4b21cf4bc90c01f183","datavalue":{"value":"http://jmlr.csail.mit.edu/papers/v17/15-162.html","type":"string"},"datatype":"url"},"type":"statement","id":"Q2834442$17A0B554-A1FC-4CB6-94C5-DE74E11DBA37","rank":"normal"}],"P226":[{"mainsnak":{"snaktype":"value","property":"P226","hash":"cfe779e91fe9c53ee133568259955801965765ae","datavalue":{"value":"68T05","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q2834442$69B3B3B0-2198-406F-95EA-5B7AF5B64AF9","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"2474c99ceba943e04dda4ddfffedfe45f5fdfd62","datavalue":{"value":"62F15","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q2834442$97E5011B-8ED7-4DBF-87BB-170B17D12838","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P226","hash":"e3bc6c7cda8279d6ad2b578e7a7e4967447c3cc4","datavalue":{"value":"93E20","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q2834442$3717F121-D1C9-4E93-8C41-78A471755338","rank":"normal"}],"P1451":[{"mainsnak":{"snaktype":"value","property":"P1451","hash":"283be10d6a16de5c26e4cd5963bf8821dce2d30b","datavalue":{"value":"6655043","type":"string"},"datatype":"external-id"},"type":"statement","id":"Q2834442$A068F445-1D43-4285-AB31-660FEE57601D","rank":"normal"}],"P1450":[{"mainsnak":{"snaktype":"value","property":"P1450","hash":"ab31482eee8f34fa99c5284980862acac4a0b6b4","datavalue":{"value":"reinforcement learning","type":"string"},"datatype":"string"},"type":"statement","id":"Q2834442$4915CD60-C4DE-4D31-8D47-5376878629D5","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"235d9f1ea5bfa248691c100bdc7ea768eaf09bf9","datavalue":{"value":"control","type":"string"},"datatype":"string"},"type":"statement","id":"Q2834442$80F8C620-8BCA-42A5-B97B-5352A0892AD2","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"966db26594221093ae08bf0043ea4d3322f3db91","datavalue":{"value":"Gaussian processes","type":"string"},"datatype":"string"},"type":"statement","id":"Q2834442$8455E64A-DF55-4BD6-9473-B53BBCFA5886","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"9d21ec545fb33c97c8e47c301264a2ba6289ddbf","datavalue":{"value":"filtering","type":"string"},"datatype":"string"},"type":"statement","id":"Q2834442$32C904FB-D202-4593-8C41-C3577D7E115F","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1450","hash":"8f091f8059c950552814fbf1d9159bf350956a70","datavalue":{"value":"Bayesian inference","type":"string"},"datatype":"string"},"type":"statement","id":"Q2834442$AEB8F0F4-08EC-4067-B335-47AECF799C61","rank":"normal"}],"P1460":[{"mainsnak":{"snaktype":"value","property":"P1460","hash":"57f7fea50d2ce1b39b695c4a1313582eed405e38","datavalue":{"value":{"entity-type":"item","numeric-id":5976449,"id":"Q5976449"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q2834442$31508F0E-5BB8-4751-9ADE-38D6772A6114","rank":"normal"}],"P159":[{"mainsnak":{"snaktype":"value","property":"P159","hash":"a39f38a1ec1d2a3fe98cb30ef056709ecbb6eeaf","datavalue":{"value":{"text":"Dual control for approximate Bayesian reinforcement learning","language":"en"},"type":"monolingualtext"},"datatype":"monolingualtext"},"type":"statement","id":"Q2834442$F4C40CB0-712A-479E-A914-49A4C4227E95","rank":"normal"}],"P200":[{"mainsnak":{"snaktype":"value","property":"P200","hash":"05810dfd27f2b8a246f7d0a5b34fbcc72e448b7a","datavalue":{"value":{"entity-type":"item","numeric-id":6582882,"id":"Q6582882"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q2834442$698D36C8-ADC8-4A4D-B8AC-5967BCE922A5","rank":"normal"}],"P1448":[{"mainsnak":{"snaktype":"value","property":"P1448","hash":"18df67d8306b029c569532a579cf1a9dd54f3a9b","datavalue":{"value":"Bayesian reinforcement learning, or dual control, provides a principled solution to the exploration-exploitation trade-off for learning in interactive settings. This paper extends an old Bayesian reinforcement learning in control theory in the context of modern regression methods with ideas from contempary machine learning, including approximate Gaussian process regression and multi-layer networks. Experimental results and simple examples are also given to demonstrate the effectiveness of the proposed dual control framework.","type":"string"},"datatype":"string"},"type":"statement","id":"Q2834442$CF3E3233-9C21-4F6F-8161-58FAA2A8ECFC","rank":"normal"}],"P1447":[{"mainsnak":{"snaktype":"value","property":"P1447","hash":"561316b0a0bbf19e4f98f997f89b76eb49e6a18d","datavalue":{"value":{"entity-type":"item","numeric-id":461187,"id":"Q461187"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","id":"Q2834442$F72F3010-1F1F-4F90-A37B-1FA0106ED3A8","rank":"normal"}],"P1643":[{"mainsnak":{"snaktype":"value","property":"P1643","hash":"19a8254bd344a492d69513ddf3ec8323c2c9f9c5","datavalue":{"value":{"entity-type":"item","numeric-id":4971589,"id":"Q4971589"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"d1d883423345548c826d2093ff3e98106239480e","datavalue":{"value":{"amount":"+0.7686507701873779","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q2834442$B4948ADF-B42D-4B60-A0EF-94DDFA5CBE70","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"9c98fad0014eb0a3f6fc0a31f3aca58efb2e85cb","datavalue":{"value":{"entity-type":"item","numeric-id":2938731,"id":"Q2938731"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"35c8902b5806db2b8976384a8cfc6d83e9a56684","datavalue":{"value":{"amount":"+0.751600980758667","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q2834442$28115F2E-3E9C-47AF-84C1-21CF306BE398","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"d4b8b9562782cd4ccbc857c784aadea9580b4dc5","datavalue":{"value":{"entity-type":"item","numeric-id":1729066,"id":"Q1729066"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"9b741a1f4169510d7f41c8b0d0a261282397636d","datavalue":{"value":{"amount":"+0.7511633634567261","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q2834442$EBC527E0-56D3-4815-9AFF-7BB6EF44212C","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"2e71a87ad922054aec4c317c36f76e07b353112c","datavalue":{"value":{"entity-type":"item","numeric-id":5396657,"id":"Q5396657"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"22d28fe1258fcd07848a5f5ea393ec47bc9dfbaa","datavalue":{"value":{"amount":"+0.75022292137146","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q2834442$62DC094A-4356-4B43-A8DF-4D9DD27E25C5","rank":"normal"},{"mainsnak":{"snaktype":"value","property":"P1643","hash":"0c2e6465e41268a57e5db24a271575d60fc04a1a","datavalue":{"value":{"entity-type":"item","numeric-id":2425228,"id":"Q2425228"},"type":"wikibase-entityid"},"datatype":"wikibase-item"},"type":"statement","qualifiers":{"P1659":[{"snaktype":"value","property":"P1659","hash":"ead21524b528df6d14c7c5542cee2a1522c825fb","datavalue":{"value":{"amount":"+0.7195886373519897","unit":"1"},"type":"quantity"},"datatype":"quantity"}],"P1660":[{"snaktype":"value","property":"P1660","hash":"a327a09ea0305e98d5cf33bd4036320e19f2aed0","datavalue":{"value":{"entity-type":"item","numeric-id":6821328,"id":"Q6821328"},"type":"wikibase-entityid"},"datatype":"wikibase-item"}]},"qualifiers-order":["P1659","P1660"],"id":"Q2834442$89489096-15B6-400E-B26A-FF92C822F2F1","rank":"normal"}]},"sitelinks":{"mardi":{"site":"mardi","title":"Publication:2834442","badges":[],"url":"https://portal.mardi4nfdi.de/wiki/Publication:2834442"}}}}}