[https://portal.mardi4nfdi.de/wiki/Person:378730 Dimitri P. Bertsekas], [https://portal.mardi4nfdi.de/wiki/Person:290261 John N. Tsitsiklis] table#1 { ["head"] = table#2 { ["vars"] = table#3 { "subjectLabel", "itemType", [0] = "subject", }, }, ["results"] = table#4 { ["bindings"] = table#5 { table#6 { ["itemType"] = table#7 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#8 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5323768", }, ["subjectLabel"] = table#9 { ["type"] = "literal", ["value"] = "Randomized Shortest-Path Problems: Two Related Models", ["xml:lang"] = "en", }, }, table#10 { ["itemType"] = table#11 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#12 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6052349", }, ["subjectLabel"] = table#13 { ["type"] = "literal", ["value"] = "Dimension reduction based adaptive dynamic programming for optimal control of discrete-time nonlinear control-affine systems", ["xml:lang"] = "en", }, }, table#14 { ["itemType"] = table#15 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#16 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6053923", }, ["subjectLabel"] = table#17 { ["type"] = "literal", ["value"] = "Model-free algorithm for consensus of discrete-time multi-agent systems using reinforcement learning method", ["xml:lang"] = "en", }, }, table#18 { ["itemType"] = table#19 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#20 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6054448", }, ["subjectLabel"] = table#21 { ["type"] = "literal", ["value"] = "Deep empirical risk minimization in finance: Looking into the future", ["xml:lang"] = "en", }, }, table#22 { ["itemType"] = table#23 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#24 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6061923", }, ["subjectLabel"] = table#25 { ["type"] = "literal", ["value"] = "A Lyapunov characterization of robust policy optimization", ["xml:lang"] = "en", }, }, table#26 { ["itemType"] = table#27 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#28 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6073102", }, ["subjectLabel"] = table#29 { ["type"] = "literal", ["value"] = "Adaptive optimal control of continuous-time nonlinear affine systems via hybrid iteration", ["xml:lang"] = "en", }, }, table#30 { ["itemType"] = table#31 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#32 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6081048", }, ["subjectLabel"] = table#33 { ["type"] = "literal", ["value"] = "Zero‐sum game optimal control for the nonlinear switched systems based on heuristic dynamic programming", ["xml:lang"] = "en", }, }, table#34 { ["itemType"] = table#35 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#36 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6087121", }, ["subjectLabel"] = table#37 { ["type"] = "literal", ["value"] = "Parameter estimation in a 3‐parameter p‐star random graph model", ["xml:lang"] = "en", }, }, table#38 { ["itemType"] = table#39 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#40 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6088373", }, ["subjectLabel"] = table#41 { ["type"] = "literal", ["value"] = "Optimal transmission strategy for multiple Markovian fading channels: existence, structure, and approximation", ["xml:lang"] = "en", }, }, table#42 { ["itemType"] = table#43 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#44 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6089843", }, ["subjectLabel"] = table#45 { ["type"] = "literal", ["value"] = "Optimal control of a two‐wheeled self‐balancing robot by reinforcement learning", ["xml:lang"] = "en", }, }, table#46 { ["itemType"] = table#47 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#48 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6092463", }, ["subjectLabel"] = table#49 { ["type"] = "literal", ["value"] = "Multi-agent off-policy actor-critic algorithm for distributed multi-task reinforcement learning", ["xml:lang"] = "en", }, }, table#50 { ["itemType"] = table#51 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#52 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6099305", }, ["subjectLabel"] = table#53 { ["type"] = "literal", ["value"] = "Optimal output tracking control of linear discrete-time systems with unknown dynamics by adaptive dynamic programming and output feedback", ["xml:lang"] = "en", }, }, table#54 { ["itemType"] = table#55 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#56 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6103683", }, ["subjectLabel"] = table#57 { ["type"] = "literal", ["value"] = "Solving nonlinear and dynamic programming equations on extended \$b\$-metric spaces with the fixed-point technique", ["xml:lang"] = "en", }, }, table#58 { ["itemType"] = table#59 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#60 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6105557", }, ["subjectLabel"] = table#61 { ["type"] = "literal", ["value"] = "SOS-based policy iteration for H_∞ control of polynomial systems with uncertain parameters", ["xml:lang"] = "en", }, }, table#62 { ["itemType"] = table#63 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#64 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6106486", }, ["subjectLabel"] = table#65 { ["type"] = "literal", ["value"] = "Solving large-scale dynamic vehicle routing problems with stochastic requests", ["xml:lang"] = "en", }, }, table#66 { ["itemType"] = table#67 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#68 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6109553", }, ["subjectLabel"] = table#69 { ["type"] = "literal", ["value"] = "Dynamic parcel pick-up routing problem with prioritized customers and constrained capacity via lower-bound-based rollout approach", ["xml:lang"] = "en", }, }, table#70 { ["itemType"] = table#71 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#72 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6112619", }, ["subjectLabel"] = table#73 { ["type"] = "literal", ["value"] = "Optimized ensemble value function approximation for dynamic programming", ["xml:lang"] = "en", }, }, table#74 { ["itemType"] = table#75 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#76 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6114929", }, ["subjectLabel"] = table#77 { ["type"] = "literal", ["value"] = "A reinforcement learning approach to the stochastic cutting stock problem", ["xml:lang"] = "en", }, }, table#78 { ["itemType"] = table#79 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#80 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6136089", }, ["subjectLabel"] = table#81 { ["type"] = "literal", ["value"] = "Certified reinforcement learning with logic guidance", ["xml:lang"] = "en", }, }, table#82 { ["itemType"] = table#83 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#84 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6139546", }, ["subjectLabel"] = table#85 { ["type"] = "literal", ["value"] = "Reinforcement Learning, Bit by Bit", ["xml:lang"] = "en", }, }, table#86 { ["itemType"] = table#87 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#88 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6148333", }, ["subjectLabel"] = table#89 { ["type"] = "literal", ["value"] = "A simple illustration of interleaved learning using Kalman filter for linear least squares", ["xml:lang"] = "en", }, }, table#90 { ["itemType"] = table#91 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#92 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6148353", }, ["subjectLabel"] = table#93 { ["type"] = "literal", ["value"] = "Target Network and Truncation Overcome the Deadly Triad in \$\\boldsymbol{Q}\$-Learning", ["xml:lang"] = "en", }, }, table#94 { ["itemType"] = table#95 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#96 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6150407", }, ["subjectLabel"] = table#97 { ["type"] = "literal", ["value"] = "Entropy regularized actor-critic based multi-agent deep reinforcement learning for stochastic games", ["xml:lang"] = "en", }, }, table#98 { ["itemType"] = table#99 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#100 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6161346", }, ["subjectLabel"] = table#101 { ["type"] = "literal", ["value"] = "A stochastic contraction mapping theorem", ["xml:lang"] = "en", }, }, table#102 { ["itemType"] = table#103 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#104 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6164023", }, ["subjectLabel"] = table#105 { ["type"] = "literal", ["value"] = "Separation of learning and control for cyber-physical systems", ["xml:lang"] = "en", }, }, table#106 { ["itemType"] = table#107 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#108 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6164031", }, ["subjectLabel"] = table#109 { ["type"] = "literal", ["value"] = "Distributed consensus-based multi-agent temporal-difference learning", ["xml:lang"] = "en", }, }, table#110 { ["itemType"] = table#111 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#112 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6164369", }, ["subjectLabel"] = table#113 { ["type"] = "literal", ["value"] = "Optimal decision-making of mutual fund temporary borrowing problem via approximate dynamic programming", ["xml:lang"] = "en", }, }, table#114 { ["itemType"] = table#115 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#116 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6167107", }, ["subjectLabel"] = table#117 { ["type"] = "literal", ["value"] = "Convergence of gradient algorithms for nonconvex \$C^{1+ \\alpha}\$ cost functions", ["xml:lang"] = "en", }, }, table#118 { ["itemType"] = table#119 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#120 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6170968", }, ["subjectLabel"] = table#121 { ["type"] = "literal", ["value"] = "State-flipped control and Q-learning for finite horizon output tracking of Boolean control networks", ["xml:lang"] = "en", }, }, table#122 { ["itemType"] = table#123 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#124 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6174076", }, ["subjectLabel"] = table#125 { ["type"] = "literal", ["value"] = "Premium control with reinforcement learning", ["xml:lang"] = "en", }, }, table#126 { ["itemType"] = table#127 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#128 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6188203", }, ["subjectLabel"] = table#129 { ["type"] = "literal", ["value"] = "Event-triggered optimal control for discrete-time multi-player non-zero-sum games using parallel control", ["xml:lang"] = "en", }, }, table#130 { ["itemType"] = table#131 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#132 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6196297", }, ["subjectLabel"] = table#133 { ["type"] = "literal", ["value"] = "Improving reinforcement learning algorithms: Towards optimal learning rate policies", ["xml:lang"] = "en", }, }, table#134 { ["itemType"] = table#135 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#136 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q6198082", }, ["subjectLabel"] = table#137 { ["type"] = "literal", ["value"] = "Primal-Dual Regression Approach for Markov Decision Processes with General State and Action Spaces", ["xml:lang"] = "en", }, }, table#138 { ["itemType"] = table#139 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#140 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5380530", }, ["subjectLabel"] = table#141 { ["type"] = "literal", ["value"] = "$Q$-Learning in a Stochastic Stackelberg Game between an Uninformed Leader and a Naive Follower", ["xml:lang"] = "en", }, }, table#142 { ["itemType"] = table#143 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#144 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5380837", }, ["subjectLabel"] = table#145 { ["type"] = "literal", ["value"] = "LQG Online Learning", ["xml:lang"] = "en", }, }, table#146 { ["itemType"] = table#147 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#148 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5383780", }, ["subjectLabel"] = table#149 { ["type"] = "literal", ["value"] = "Risk-Sensitive Reinforcement Learning", ["xml:lang"] = "en", }, }, table#150 { ["itemType"] = table#151 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#152 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5699354", }, ["subjectLabel"] = table#153 { ["type"] = "literal", ["value"] = "REINFORCEMENT LEARNING WITH GOAL-DIRECTED ELIGIBILITY TRACES", ["xml:lang"] = "en", }, }, table#154 { ["itemType"] = table#155 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#156 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5898263", }, ["subjectLabel"] = table#157 { ["type"] = "literal", ["value"] = "Asymptotic analysis of temporal-difference learning algorithms with constant step-sizes", ["xml:lang"] = "en", }, }, table#158 { ["itemType"] = table#159 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#160 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5900888", }, ["subjectLabel"] = table#161 { ["type"] = "literal", ["value"] = "Some operations research methods for analyzing protein sequences and structures", ["xml:lang"] = "en", }, }, table#162 { ["itemType"] = table#163 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#164 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5920615", }, ["subjectLabel"] = table#165 { ["type"] = "literal", ["value"] = "Asymptotic analysis of temporal-difference learning algorithms with constant step-sizes", ["xml:lang"] = "en", }, }, table#166 { ["itemType"] = table#167 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#168 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5956205", }, ["subjectLabel"] = table#169 { ["type"] = "literal", ["value"] = "Mathematical programming for network revenue management revisited", ["xml:lang"] = "en", }, }, table#170 { ["itemType"] = table#171 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#172 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5958425", }, ["subjectLabel"] = table#173 { ["type"] = "literal", ["value"] = "A sensitivity formula for risk-sensitive cost and the actor-critic algorithm", ["xml:lang"] = "en", }, }, table#174 { ["itemType"] = table#175 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#176 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5452090", }, ["subjectLabel"] = table#177 { ["type"] = "literal", ["value"] = "A Relational Hierarchical Model for Decision-Theoretic Assistance", ["xml:lang"] = "en", }, }, table#178 { ["itemType"] = table#179 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#180 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5963104", }, ["subjectLabel"] = table#181 { ["type"] = "literal", ["value"] = "Minimising average passenger waiting time in personal rapid transit systems", ["xml:lang"] = "en", }, }, table#182 { ["itemType"] = table#183 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#184 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5494536", }, ["subjectLabel"] = table#185 { ["type"] = "literal", ["value"] = "Power and delay optimisation in multi-hop wireless networks", ["xml:lang"] = "en", }, }, table#186 { ["itemType"] = table#187 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#188 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5502179", }, ["subjectLabel"] = table#189 { ["type"] = "literal", ["value"] = "On Convergence of Value Iteration for a Class of Total Cost Markov Decision Processes", ["xml:lang"] = "en", }, }, table#190 { ["itemType"] = table#191 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#192 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5856670", }, ["subjectLabel"] = table#193 { ["type"] = "literal", ["value"] = "Empirical Q-Value Iteration", ["xml:lang"] = "en", }, }, table#194 { ["itemType"] = table#195 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#196 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5861980", }, ["subjectLabel"] = table#197 { ["type"] = "literal", ["value"] = "Incremental Quasi-Subgradient Method for Minimizing Sum of Geodesic Quasi-Convex Functions on Riemannian Manifolds with Applications", ["xml:lang"] = "en", }, }, table#198 { ["itemType"] = table#199 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#200 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5862806", }, ["subjectLabel"] = table#201 { ["type"] = "literal", ["value"] = "Multiply Accelerated Value Iteration for NonSymmetric Affine Fixed Point Problems and Application to Markov Decision Processes", ["xml:lang"] = "en", }, }, table#202 { ["itemType"] = table#203 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#204 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5265704", }, ["subjectLabel"] = table#205 { ["type"] = "literal", ["value"] = "Reinforcement learning for adaptive optimal control of unknown continuous-time nonlinear systems with input constraints", ["xml:lang"] = "en", }, }, table#206 { ["itemType"] = table#207 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#208 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5265786", }, ["subjectLabel"] = table#209 { ["type"] = "literal", ["value"] = "Approximation of average cost Markov decision processes using empirical distributions and concentration inequalities", ["xml:lang"] = "en", }, }, table#210 { ["itemType"] = table#211 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#212 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5868949", }, ["subjectLabel"] = table#213 { ["type"] = "literal", ["value"] = "Distributed Stochastic Optimization with Large Delays", ["xml:lang"] = "en", }, }, table#214 { ["itemType"] = table#215 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#216 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5868951", }, ["subjectLabel"] = table#217 { ["type"] = "literal", ["value"] = "Analyzing Approximate Value Iteration Algorithms", ["xml:lang"] = "en", }, }, table#218 { ["itemType"] = table#219 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#220 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q819117", }, ["subjectLabel"] = table#221 { ["type"] = "literal", ["value"] = "Optimization of a large-scale water reservoir network by stochastic dynamic programming with efficient state space discretization", ["xml:lang"] = "en", }, }, table#222 { ["itemType"] = table#223 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#224 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q280322", }, ["subjectLabel"] = table#225 { ["type"] = "literal", ["value"] = "Neural network approach to continuous-time direct adaptive optimal control for partially unknown nonlinear systems", ["xml:lang"] = "en", }, }, table#226 { ["itemType"] = table#227 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#228 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q286519", }, ["subjectLabel"] = table#229 { ["type"] = "literal", ["value"] = "A constrained optimization perspective on actor-critic algorithms and application to network routing", ["xml:lang"] = "en", }, }, table#230 { ["itemType"] = table#231 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#232 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q289143", }, ["subjectLabel"] = table#233 { ["type"] = "literal", ["value"] = "Potential-based least-squares policy iteration for a parameterized feedback control system", ["xml:lang"] = "en", }, }, table#234 { ["itemType"] = table#235 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#236 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q290478", }, ["subjectLabel"] = table#237 { ["type"] = "literal", ["value"] = "Adaptive importance sampling for control and inference", ["xml:lang"] = "en", }, }, table#238 { ["itemType"] = table#239 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#240 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q835832", }, ["subjectLabel"] = table#241 { ["type"] = "literal", ["value"] = "The factored policy-gradient planner", ["xml:lang"] = "en", }, }, table#242 { ["itemType"] = table#243 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#244 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q835833", }, ["subjectLabel"] = table#245 { ["type"] = "literal", ["value"] = "Practical solution techniques for first-order MDPs", ["xml:lang"] = "en", }, }, table#246 { ["itemType"] = table#247 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#248 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q299794", }, ["subjectLabel"] = table#249 { ["type"] = "literal", ["value"] = "Approximate dynamic programming for stochastic linear control problems on compact state spaces", ["xml:lang"] = "en", }, }, table#250 { ["itemType"] = table#251 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#252 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q300040", }, ["subjectLabel"] = table#253 { ["type"] = "literal", ["value"] = "Solving average cost Markov decision processes by means of a two-phase time aggregation algorithm", ["xml:lang"] = "en", }, }, table#254 { ["itemType"] = table#255 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#256 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q302091", }, ["subjectLabel"] = table#257 { ["type"] = "literal", ["value"] = "Computable approximations for continuous-time Markov decision processes on Borel spaces based on empirical measures", ["xml:lang"] = "en", }, }, table#258 { ["itemType"] = table#259 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#260 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q311890", }, ["subjectLabel"] = table#261 { ["type"] = "literal", ["value"] = "Output-feedback adaptive optimal control of interconnected systems based on robust adaptive dynamic programming", ["xml:lang"] = "en", }, }, table#262 { ["itemType"] = table#263 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#264 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q312650", }, ["subjectLabel"] = table#265 { ["type"] = "literal", ["value"] = "Multiscale Q-learning with linear function approximation", ["xml:lang"] = "en", }, }, table#266 { ["itemType"] = table#267 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#268 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q313259", }, ["subjectLabel"] = table#269 { ["type"] = "literal", ["value"] = "Value iteration and adaptive dynamic programming for data-driven adaptive optimal control design", ["xml:lang"] = "en", }, }, table#270 { ["itemType"] = table#271 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#272 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q848403", }, ["subjectLabel"] = table#273 { ["type"] = "literal", ["value"] = "Strategy optimization for controlled Markov process with descriptive complexity constraint", ["xml:lang"] = "en", }, }, table#274 { ["itemType"] = table#275 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#276 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q319221", }, ["subjectLabel"] = table#277 { ["type"] = "literal", ["value"] = "Modeling and optimization control of a demand-driven, conveyor-serviced production station", ["xml:lang"] = "en", }, }, table#278 { ["itemType"] = table#279 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#280 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q319710", }, ["subjectLabel"] = table#281 { ["type"] = "literal", ["value"] = "Solving stochastic resource-constrained project scheduling problems by closed-loop approximate dynamic programming", ["xml:lang"] = "en", }, }, table#282 { ["itemType"] = table#283 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#284 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q849073", }, ["subjectLabel"] = table#285 { ["type"] = "literal", ["value"] = "On solving the Lagrangian dual of integer programs via an incremental approach", ["xml:lang"] = "en", }, }, table#286 { ["itemType"] = table#287 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#288 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q320866", }, ["subjectLabel"] = table#289 { ["type"] = "literal", ["value"] = "New approximate dynamic programming algorithms for large-scale undiscounted Markov decision processes and their application to optimize a production and distribution system", ["xml:lang"] = "en", }, }, table#290 { ["itemType"] = table#291 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#292 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q850310", }, ["subjectLabel"] = table#293 { ["type"] = "literal", ["value"] = "Neural network and regression spline value function approximations for stochastic dynamic programming", ["xml:lang"] = "en", }, }, table#294 { ["itemType"] = table#295 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#296 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q323422", }, ["subjectLabel"] = table#297 { ["type"] = "literal", ["value"] = "Approximate dynamic programming for the dispatch of military medical evacuation assets", ["xml:lang"] = "en", }, }, table#298 { ["itemType"] = table#299 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#300 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q330284", }, ["subjectLabel"] = table#301 { ["type"] = "literal", ["value"] = "A perturbation approach to a class of discounted approximate value iteration algorithms with Borel spaces", ["xml:lang"] = "en", }, }, table#302 { ["itemType"] = table#303 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#304 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q333093", }, ["subjectLabel"] = table#305 { ["type"] = "literal", ["value"] = "Perspectives of approximate dynamic programming", ["xml:lang"] = "en", }, }, table#306 { ["itemType"] = table#307 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#308 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q336896", }, ["subjectLabel"] = table#309 { ["type"] = "literal", ["value"] = "Low-discrepancy sampling for approximate dynamic programming with local approximators", ["xml:lang"] = "en", }, }, table#310 { ["itemType"] = table#311 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#312 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q340682", }, ["subjectLabel"] = table#313 { ["type"] = "literal", ["value"] = "Efficient model-based reinforcement learning for approximate online optimal control", ["xml:lang"] = "en", }, }, table#314 { ["itemType"] = table#315 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#316 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q360261", }, ["subjectLabel"] = table#317 { ["type"] = "literal", ["value"] = "Planning for multiple measurement channels in a continuous-state POMDP", ["xml:lang"] = "en", }, }, table#318 { ["itemType"] = table#319 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#320 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q373210", }, ["subjectLabel"] = table#321 { ["type"] = "literal", ["value"] = "An approximate dynamic programming framework for modeling global climate policy under decision-dependent uncertainty", ["xml:lang"] = "en", }, }, table#322 { ["itemType"] = table#323 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#324 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q378731", }, ["subjectLabel"] = table#325 { ["type"] = "literal", ["value"] = "Q-learning and policy iteration algorithms for stochastic shortest path problems", ["xml:lang"] = "en", }, }, table#326 { ["itemType"] = table#327 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#328 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q608439", }, ["subjectLabel"] = table#329 { ["type"] = "literal", ["value"] = "Model-free \$H_{\\infty }\$ control design for unknown linear discrete-time systems via Q-learning with LMI", ["xml:lang"] = "en", }, }, table#330 { ["itemType"] = table#331 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#332 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q391749", }, ["subjectLabel"] = table#333 { ["type"] = "literal", ["value"] = "General time consistent discounting", ["xml:lang"] = "en", }, }, table#334 { ["itemType"] = table#335 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#336 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q613579", }, ["subjectLabel"] = table#337 { ["type"] = "literal", ["value"] = "Suboptimal solutions to dynamic optimization problems via approximations of the policy functions", ["xml:lang"] = "en", }, }, table#338 { ["itemType"] = table#339 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#340 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q614660", }, ["subjectLabel"] = table#341 { ["type"] = "literal", ["value"] = "Performance evaluation of direct heuristic dynamic programming using control-theoretic measures", ["xml:lang"] = "en", }, }, table#342 { ["itemType"] = table#343 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#344 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q397504", }, ["subjectLabel"] = table#345 { ["type"] = "literal", ["value"] = "Robust adaptive dynamic programming for linear and nonlinear systems: an overview", ["xml:lang"] = "en", }, }, table#346 { ["itemType"] = table#347 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#348 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q616967", }, ["subjectLabel"] = table#349 { ["type"] = "literal", ["value"] = "An actor-critic algorithm with function approximation for discounted cost constrained Markov decision processes", ["xml:lang"] = "en", }, }, table#350 { ["itemType"] = table#351 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#352 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q399890", }, ["subjectLabel"] = table#353 { ["type"] = "literal", ["value"] = "Minimax PAC bounds on the sample complexity of reinforcement learning with a generative model", ["xml:lang"] = "en", }, }, table#354 { ["itemType"] = table#355 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#356 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q400630", }, ["subjectLabel"] = table#357 { ["type"] = "literal", ["value"] = "Moneybarl: exploiting pitcher decision-making using reinforcement learning", ["xml:lang"] = "en", }, }, table#358 { ["itemType"] = table#359 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#360 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q621672", }, ["subjectLabel"] = table#361 { ["type"] = "literal", ["value"] = "A comparison of global and semi-local approximation in \$T\$-stage stochastic optimization", ["xml:lang"] = "en", }, }, table#362 { ["itemType"] = table#363 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#364 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q622592", }, ["subjectLabel"] = table#365 { ["type"] = "literal", ["value"] = "Ranking policies in discrete Markov decision processes", ["xml:lang"] = "en", }, }, table#366 { ["itemType"] = table#367 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#368 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q415370", }, ["subjectLabel"] = table#369 { ["type"] = "literal", ["value"] = "Minimum and worst-case performance ratios of rollout algorithms", ["xml:lang"] = "en", }, }, table#370 { ["itemType"] = table#371 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#372 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q415609", }, ["subjectLabel"] = table#373 { ["type"] = "literal", ["value"] = "The optimal unbiased value estimator and its relation to LSTD, TD and MC", ["xml:lang"] = "en", }, }, table#374 { ["itemType"] = table#375 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#376 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q415618", }, ["subjectLabel"] = table#377 { ["type"] = "literal", ["value"] = "Model selection in reinforcement learning", ["xml:lang"] = "en", }, }, table#378 { ["itemType"] = table#379 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#380 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q418072", }, ["subjectLabel"] = table#381 { ["type"] = "literal", ["value"] = "Finding optimal memoryless policies of POMDPs under the expected average reward criterion", ["xml:lang"] = "en", }, }, table#382 { ["itemType"] = table#383 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#384 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q420939", }, ["subjectLabel"] = table#385 { ["type"] = "literal", ["value"] = "Optimal control as a graphical model inference problem", ["xml:lang"] = "en", }, }, table#386 { ["itemType"] = table#387 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#388 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q421584", }, ["subjectLabel"] = table#389 { ["type"] = "literal", ["value"] = "The optimal control of just-in-time-based production and distribution systems and performance comparisons with optimized pull systems", ["xml:lang"] = "en", }, }, table#390 { ["itemType"] = table#391 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#392 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q421775", }, ["subjectLabel"] = table#393 { ["type"] = "literal", ["value"] = "Network revenue management with inventory-sensitive bid prices and customer choice", ["xml:lang"] = "en", }, }, table#394 { ["itemType"] = table#395 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#396 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q633553", }, ["subjectLabel"] = table#397 { ["type"] = "literal", ["value"] = "Minimizing total tardiness in a stochastic single machine scheduling problem using approximate dynamic programming", ["xml:lang"] = "en", }, }, table#398 { ["itemType"] = table#399 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#400 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q433494", }, ["subjectLabel"] = table#401 { ["type"] = "literal", ["value"] = "A framework and a mean-field algorithm for the local control of spatial processes", ["xml:lang"] = "en", }, }, table#402 { ["itemType"] = table#403 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#404 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q437320", }, ["subjectLabel"] = table#405 { ["type"] = "literal", ["value"] = "Potentials based optimization with embedded Markov chain for stochastic constrained system", ["xml:lang"] = "en", }, }, table#406 { ["itemType"] = table#407 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#408 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q438776", }, ["subjectLabel"] = table#409 { ["type"] = "literal", ["value"] = "An online actor-critic algorithm with function approximation for constrained Markov decision processes", ["xml:lang"] = "en", }, }, table#410 { ["itemType"] = table#411 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#412 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q439484", }, ["subjectLabel"] = table#413 { ["type"] = "literal", ["value"] = "Approximate dynamic programming for capacity allocation in the service industry", ["xml:lang"] = "en", }, }, table#414 { ["itemType"] = table#415 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#416 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q439492", }, ["subjectLabel"] = table#417 { ["type"] = "literal", ["value"] = "Performance optimization of queueing systems with perturbation realization", ["xml:lang"] = "en", }, }, table#418 { ["itemType"] = table#419 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#420 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q439615", }, ["subjectLabel"] = table#421 { ["type"] = "literal", ["value"] = "Fitting piecewise linear continuous functions", ["xml:lang"] = "en", }, }, table#422 { ["itemType"] = table#423 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#424 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q642894", }, ["subjectLabel"] = table#425 { ["type"] = "literal", ["value"] = "Multi-player non-zero-sum games: online adaptive learning solution of coupled Hamilton-Jacobi equations", ["xml:lang"] = "en", }, }, table#426 { ["itemType"] = table#427 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#428 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q448322", }, ["subjectLabel"] = table#429 { ["type"] = "literal", ["value"] = "Asymptotic analysis of value prediction by well-specified and misspecified models", ["xml:lang"] = "en", }, }, table#430 { ["itemType"] = table#431 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#432 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q644913", }, ["subjectLabel"] = table#433 { ["type"] = "literal", ["value"] = "Incremental proximal methods for large scale convex optimization", ["xml:lang"] = "en", }, }, table#434 { ["itemType"] = table#435 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#436 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q453330", }, ["subjectLabel"] = table#437 { ["type"] = "literal", ["value"] = "Iterative methods for the solution of a singular control formulation of a GMWB pricing problem", ["xml:lang"] = "en", }, }, table#438 { ["itemType"] = table#439 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#440 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q647433", }, ["subjectLabel"] = table#441 { ["type"] = "literal", ["value"] = "A dynamic programming strategy to balance exploration and exploitation in the bandit problem", ["xml:lang"] = "en", }, }, table#442 { ["itemType"] = table#443 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#444 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q458982", }, ["subjectLabel"] = table#445 { ["type"] = "literal", ["value"] = "Optimal tracking control of nonlinear partially-unknown constrained-input systems using integral reinforcement learning", ["xml:lang"] = "en", }, }, table#446 { ["itemType"] = table#447 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#448 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q459022", }, ["subjectLabel"] = table#449 { ["type"] = "literal", ["value"] = "Value set iteration for Markov decision processes", ["xml:lang"] = "en", }, }, table#450 { ["itemType"] = table#451 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#452 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q650520", }, ["subjectLabel"] = table#453 { ["type"] = "literal", ["value"] = "Decentralized MDPs with sparse interactions", ["xml:lang"] = "en", }, }, table#454 { ["itemType"] = table#455 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#456 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q460624", }, ["subjectLabel"] = table#457 { ["type"] = "literal", ["value"] = "Depth-based short-sighted stochastic shortest path problems", ["xml:lang"] = "en", }, }, table#458 { ["itemType"] = table#459 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#460 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q461464", }, ["subjectLabel"] = table#461 { ["type"] = "literal", ["value"] = "A tutorial on event-based optimization -- a new optimization framework", ["xml:lang"] = "en", }, }, table#462 { ["itemType"] = table#463 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#464 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q463779", }, ["subjectLabel"] = table#465 { ["type"] = "literal", ["value"] = "Control: a perspective", ["xml:lang"] = "en", }, }, table#466 { ["itemType"] = table#467 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#468 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q463819", }, ["subjectLabel"] = table#469 { ["type"] = "literal", ["value"] = "Integral reinforcement learning and experience replay for adaptive optimal control of partially-unknown constrained-input continuous-time systems", ["xml:lang"] = "en", }, }, table#470 { ["itemType"] = table#471 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#472 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q463893", }, ["subjectLabel"] = table#473 { ["type"] = "literal", ["value"] = "Reinforcement \$Q\$-learning for optimal tracking control of linear discrete-time systems with unknown dynamics", ["xml:lang"] = "en", }, }, table#474 { ["itemType"] = table#475 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#476 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q466457", }, ["subjectLabel"] = table#477 { ["type"] = "literal", ["value"] = "A sparse collocation method for solving time-dependent HJB equations using multivariate \$B\$-splines", ["xml:lang"] = "en", }, }, table#478 { ["itemType"] = table#479 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#480 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q467477", }, ["subjectLabel"] = table#481 { ["type"] = "literal", ["value"] = "Temporal difference-based policy iteration for optimal control of stochastic systems", ["xml:lang"] = "en", }, }, table#482 { ["itemType"] = table#483 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#484 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q472587", }, ["subjectLabel"] = table#485 { ["type"] = "literal", ["value"] = "Newton-based stochastic optimization using \$q\$-Gaussian smoothed functional algorithms", ["xml:lang"] = "en", }, }, table#486 { ["itemType"] = table#487 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#488 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q472591", }, ["subjectLabel"] = table#489 { ["type"] = "literal", ["value"] = "Adaptive dynamic programming and optimal control of nonlinear nonaffine systems", ["xml:lang"] = "en", }, }, table#490 { ["itemType"] = table#491 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#492 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q475251", }, ["subjectLabel"] = table#493 { ["type"] = "literal", ["value"] = "Influence of temporal aggregation on strategic forest management under risk of wind damage", ["xml:lang"] = "en", }, }, table#494 { ["itemType"] = table#495 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#496 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q663675", }, ["subjectLabel"] = table#497 { ["type"] = "literal", ["value"] = "Approximation of Markov decision processes with general state space", ["xml:lang"] = "en", }, }, table#498 { ["itemType"] = table#499 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#500 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q666970", }, ["subjectLabel"] = table#501 { ["type"] = "literal", ["value"] = "Resource allocation in congested queueing systems with time-varying demand: an application to airport operations", ["xml:lang"] = "en", }, }, table#502 { ["itemType"] = table#503 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#504 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q494331", }, ["subjectLabel"] = table#505 { ["type"] = "literal", ["value"] = "Generalized decision rule approximations for stochastic programming via liftings", ["xml:lang"] = "en", }, }, table#506 { ["itemType"] = table#507 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#508 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q508746", }, ["subjectLabel"] = table#509 { ["type"] = "literal", ["value"] = "Data-based analysis of discrete-time linear systems in noisy environment: controllability and observability", ["xml:lang"] = "en", }, }, table#510 { ["itemType"] = table#511 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#512 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q510127", }, ["subjectLabel"] = table#513 { ["type"] = "literal", ["value"] = "Optimal energy allocation for linear control with packet loss under energy harvesting constraints", ["xml:lang"] = "en", }, }, table#514 { ["itemType"] = table#515 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#516 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q680566", }, ["subjectLabel"] = table#517 { ["type"] = "literal", ["value"] = "Model-free event-triggered control algorithm for continuous-time linear systems with optimal performance", ["xml:lang"] = "en", }, }, table#518 { ["itemType"] = table#519 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#520 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q511735", }, ["subjectLabel"] = table#521 { ["type"] = "literal", ["value"] = "Q-learning for continuous-time linear systems: A model-free infinite horizon optimal control approach", ["xml:lang"] = "en", }, }, table#522 { ["itemType"] = table#523 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#524 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q682833", }, ["subjectLabel"] = table#525 { ["type"] = "literal", ["value"] = "Optimal switching with minimum dwell time constraint", ["xml:lang"] = "en", }, }, table#526 { ["itemType"] = table#527 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#528 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q516955", }, ["subjectLabel"] = table#529 { ["type"] = "literal", ["value"] = "Control of multistability", ["xml:lang"] = "en", }, }, table#530 { ["itemType"] = table#531 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#532 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q518303", }, ["subjectLabel"] = table#533 { ["type"] = "literal", ["value"] = "\$\\mathrm{H}_\\infty\$ control of linear discrete-time systems: off-policy reinforcement learning", ["xml:lang"] = "en", }, }, table#534 { ["itemType"] = table#535 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#536 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q526831", }, ["subjectLabel"] = table#537 { ["type"] = "literal", ["value"] = "Iteration complexity analysis of block coordinate descent methods", ["xml:lang"] = "en", }, }, table#538 { ["itemType"] = table#539 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#540 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q693680", }, ["subjectLabel"] = table#541 { ["type"] = "literal", ["value"] = "Four encounters with system identification", ["xml:lang"] = "en", }, }, table#542 { ["itemType"] = table#543 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#544 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q694822", }, ["subjectLabel"] = table#545 { ["type"] = "literal", ["value"] = "Integral \$Q\$-learning and explorized policy iteration for adaptive optimal control of continuous-time linear systems", ["xml:lang"] = "en", }, }, table#546 { ["itemType"] = table#547 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#548 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q545311", }, ["subjectLabel"] = table#549 { ["type"] = "literal", ["value"] = "Hessian matrix distribution for Bayesian policy gradient reinforcement learning", ["xml:lang"] = "en", }, }, table#550 { ["itemType"] = table#551 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#552 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q547121", }, ["subjectLabel"] = table#553 { ["type"] = "literal", ["value"] = "Sampled fictitious play for approximate dynamic programming", ["xml:lang"] = "en", }, }, table#554 { ["itemType"] = table#555 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#556 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q547917", }, ["subjectLabel"] = table#557 { ["type"] = "literal", ["value"] = "Maximizing the probability of attaining a target prior to extinction", ["xml:lang"] = "en", }, }, table#558 { ["itemType"] = table#559 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#560 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q553245", }, ["subjectLabel"] = table#561 { ["type"] = "literal", ["value"] = "Convergence analysis of online gradient method for BP neural networks", ["xml:lang"] = "en", }, }, table#562 { ["itemType"] = table#563 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#564 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q553371", }, ["subjectLabel"] = table#565 { ["type"] = "literal", ["value"] = "The Borkar-Meyn theorem for asynchronous stochastic approximations", ["xml:lang"] = "en", }, }, table#566 { ["itemType"] = table#567 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#568 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q707320", }, ["subjectLabel"] = table#569 { ["type"] = "literal", ["value"] = "A generic architecture for adaptive agents based on reinforcement learning", ["xml:lang"] = "en", }, }, table#570 { ["itemType"] = table#571 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#572 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q711389", }, ["subjectLabel"] = table#573 { ["type"] = "literal", ["value"] = "Management of water resource systems in the presence of uncertainties by nonlinear approximation techniques and deterministic sampling", ["xml:lang"] = "en", }, }, table#574 { ["itemType"] = table#575 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#576 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q713118", }, ["subjectLabel"] = table#577 { ["type"] = "literal", ["value"] = "Approximate dynamic programming via direct search in the space of value function approximations", ["xml:lang"] = "en", }, }, table#578 { ["itemType"] = table#579 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#580 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q715245", }, ["subjectLabel"] = table#581 { ["type"] = "literal", ["value"] = "Robust inversion, dimensionality reduction, and randomized sampling", ["xml:lang"] = "en", }, }, table#582 { ["itemType"] = table#583 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#584 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q716121", }, ["subjectLabel"] = table#585 { ["type"] = "literal", ["value"] = "A frequentist approach to mapping under uncertainty", ["xml:lang"] = "en", }, }, table#586 { ["itemType"] = table#587 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#588 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q721950", }, ["subjectLabel"] = table#589 { ["type"] = "literal", ["value"] = "Proximal algorithms and temporal difference methods for solving fixed point problems", ["xml:lang"] = "en", }, }, table#590 { ["itemType"] = table#591 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#592 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q724025", }, ["subjectLabel"] = table#593 { ["type"] = "literal", ["value"] = "Stochastic decomposition applied to large-scale hydro valleys management", ["xml:lang"] = "en", }, }, table#594 { ["itemType"] = table#595 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#596 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q745561", }, ["subjectLabel"] = table#597 { ["type"] = "literal", ["value"] = "Discrete-time gradient flows and law of large numbers in Alexandrov spaces", ["xml:lang"] = "en", }, }, table#598 { ["itemType"] = table#599 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#600 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q780283", }, ["subjectLabel"] = table#601 { ["type"] = "literal", ["value"] = "Learning with policy prediction in continuous state-action multi-agent decision processes", ["xml:lang"] = "en", }, }, table#602 { ["itemType"] = table#603 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#604 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q785879", }, ["subjectLabel"] = table#605 { ["type"] = "literal", ["value"] = "Strong law of large numbers for the \$L^1\$-Karcher mean", ["xml:lang"] = "en", }, }, table#606 { ["itemType"] = table#607 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#608 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q812417", }, ["subjectLabel"] = table#609 { ["type"] = "literal", ["value"] = "Parallelization strategies for rollout algorithms", ["xml:lang"] = "en", }, }, table#610 { ["itemType"] = table#611 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#612 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q253969", }, ["subjectLabel"] = table#613 { ["type"] = "literal", ["value"] = "Optimal cost almost-sure reachability in POMDPs", ["xml:lang"] = "en", }, }, table#614 { ["itemType"] = table#615 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#616 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q814475", }, ["subjectLabel"] = table#617 { ["type"] = "literal", ["value"] = "Solving factored MDPs using non-homogeneous partitions", ["xml:lang"] = "en", }, }, table#618 { ["itemType"] = table#619 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#620 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1614322", }, ["subjectLabel"] = table#621 { ["type"] = "literal", ["value"] = "A time aggregation approach to Markov decision processes", ["xml:lang"] = "en", }, }, table#622 { ["itemType"] = table#623 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#624 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1622437", }, ["subjectLabel"] = table#625 { ["type"] = "literal", ["value"] = "Ambiguous partially observable Markov decision processes: structural results and applications", ["xml:lang"] = "en", }, }, table#626 { ["itemType"] = table#627 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#628 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1627810", }, ["subjectLabel"] = table#629 { ["type"] = "literal", ["value"] = "An algorithmic approach to optimal asset liquidation problems", ["xml:lang"] = "en", }, }, table#630 { ["itemType"] = table#631 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#632 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1639231", }, ["subjectLabel"] = table#633 { ["type"] = "literal", ["value"] = "Joint optimization of ordering and maintenance with condition monitoring data", ["xml:lang"] = "en", }, }, table#634 { ["itemType"] = table#635 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#636 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1642966", }, ["subjectLabel"] = table#637 { ["type"] = "literal", ["value"] = "Active network management for electrical distribution systems: problem formulation, benchmark, and approximate solution", ["xml:lang"] = "en", }, }, table#638 { ["itemType"] = table#639 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#640 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q851872", }, ["subjectLabel"] = table#641 { ["type"] = "literal", ["value"] = "Adaptive stepsizes for recursive estimation with applications in approximate dynamic programming", ["xml:lang"] = "en", }, }, table#642 { ["itemType"] = table#643 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#644 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q853317", }, ["subjectLabel"] = table#645 { ["type"] = "literal", ["value"] = "Heterarchical reinforcement-learning model for integration of multiple cortico-striatal loops: fMRI examination in stimulus-action-reward association learning", ["xml:lang"] = "en", }, }, table#646 { ["itemType"] = table#647 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#648 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q853656", }, ["subjectLabel"] = table#649 { ["type"] = "literal", ["value"] = "Approximate policy optimization and adaptive control in regression models", ["xml:lang"] = "en", }, }, table#650 { ["itemType"] = table#651 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#652 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q856510", }, ["subjectLabel"] = table#653 { ["type"] = "literal", ["value"] = "Actor-critic algorithms for hierarchical Markov decision processes", ["xml:lang"] = "en", }, }, table#654 { ["itemType"] = table#655 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#656 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1651700", }, ["subjectLabel"] = table#657 { ["type"] = "literal", ["value"] = "Faster rollout search for the vehicle routing problem with stochastic demands and restocking", ["xml:lang"] = "en", }, }, table#658 { ["itemType"] = table#659 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#660 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q859693", }, ["subjectLabel"] = table#661 { ["type"] = "literal", ["value"] = "A policy gradient method for semi-Markov decision processes with application to call admission control", ["xml:lang"] = "en", }, }, table#662 { ["itemType"] = table#663 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#664 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1655746", }, ["subjectLabel"] = table#665 { ["type"] = "literal", ["value"] = "Envelope condition method with an application to default risk models", ["xml:lang"] = "en", }, }, table#666 { ["itemType"] = table#667 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#668 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1657201", }, ["subjectLabel"] = table#669 { ["type"] = "literal", ["value"] = "Evaluation of counterparty risk for derivatives with early-exercise features", ["xml:lang"] = "en", }, }, table#670 { ["itemType"] = table#671 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#672 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1661214", }, ["subjectLabel"] = table#673 { ["type"] = "literal", ["value"] = "An integrated data-driven Markov parameters sequence identification and adaptive dynamic programming method to design fault-tolerant optimal tracking control for completely unknown model systems", ["xml:lang"] = "en", }, }, table#674 { ["itemType"] = table#675 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#676 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1661755", }, ["subjectLabel"] = table#677 { ["type"] = "literal", ["value"] = "General value iteration based single network approach for constrained optimal controller design of partially-unknown continuous-time nonlinear systems", ["xml:lang"] = "en", }, }, table#678 { ["itemType"] = table#679 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#680 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1662486", }, ["subjectLabel"] = table#681 { ["type"] = "literal", ["value"] = "Open problems in universal induction \\& intelligence", ["xml:lang"] = "en", }, }, table#682 { ["itemType"] = table#683 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#684 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q870814", }, ["subjectLabel"] = table#685 { ["type"] = "literal", ["value"] = "Symmetric approximate linear programming for factored MDPs with application to constrained problems", ["xml:lang"] = "en", }, }, table#686 { ["itemType"] = table#687 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#688 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q872388", }, ["subjectLabel"] = table#689 { ["type"] = "literal", ["value"] = "The emergence of goals in a self-organizing network: a non-mentalist model of intentional actions", ["xml:lang"] = "en", }, }, table#690 { ["itemType"] = table#691 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#692 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q875484", }, ["subjectLabel"] = table#693 { ["type"] = "literal", ["value"] = "Model-free \$Q\$-learning designs for linear discrete-time zero-sum games with application to \$H^\\infty\$ control", ["xml:lang"] = "en", }, }, table#694 { ["itemType"] = table#695 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#696 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1678512", }, ["subjectLabel"] = table#697 { ["type"] = "literal", ["value"] = "Reinforcement learning-based control of drug dosing for cancer chemotherapy treatment", ["xml:lang"] = "en", }, }, table#698 { ["itemType"] = table#699 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#700 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1926628", }, ["subjectLabel"] = table#701 { ["type"] = "literal", ["value"] = "New stochastic approximation algorithms with adaptive step sizes", ["xml:lang"] = "en", }, }, table#702 { ["itemType"] = table#703 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#704 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1926680", }, ["subjectLabel"] = table#705 { ["type"] = "literal", ["value"] = "Solving the dynamic ambulance relocation and dispatching problem using approximate dynamic programming", ["xml:lang"] = "en", }, }, table#706 { ["itemType"] = table#707 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#708 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q889000", }, ["subjectLabel"] = table#709 { ["type"] = "literal", ["value"] = "Extremum seeking of dynamical systems via gradient descent and stochastic approximation methods", ["xml:lang"] = "en", }, }, table#710 { ["itemType"] = table#711 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#712 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1931657", }, ["subjectLabel"] = table#713 { ["type"] = "literal", ["value"] = "Energy contracts management by stochastic programming techniques", ["xml:lang"] = "en", }, }, table#714 { ["itemType"] = table#715 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#716 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q889365", }, ["subjectLabel"] = table#717 { ["type"] = "literal", ["value"] = "Immediate return preference emerged from a synaptic learning rule for return maximization", ["xml:lang"] = "en", }, }, table#718 { ["itemType"] = table#719 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#720 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1689603", }, ["subjectLabel"] = table#721 { ["type"] = "literal", ["value"] = "Variance-constrained actor-critic algorithms for discounted and average reward MDPs", ["xml:lang"] = "en", }, }, table#722 { ["itemType"] = table#723 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#724 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q894322", }, ["subjectLabel"] = table#725 { ["type"] = "literal", ["value"] = "Complete stability analysis of a heuristic approximate dynamic programming control design", ["xml:lang"] = "en", }, }, table#726 { ["itemType"] = table#727 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#728 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1937498", }, ["subjectLabel"] = table#729 { ["type"] = "literal", ["value"] = "Approximate stochastic annealing for online control of infinite horizon Markov decision processes", ["xml:lang"] = "en", }, }, table#730 { ["itemType"] = table#731 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#732 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q895789", }, ["subjectLabel"] = table#733 { ["type"] = "literal", ["value"] = "A reinforcement learning approach to convoy scheduling on a contested transportation network", ["xml:lang"] = "en", }, }, table#734 { ["itemType"] = table#735 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#736 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q900691", }, ["subjectLabel"] = table#737 { ["type"] = "literal", ["value"] = "Non-zero sum Nash Q-learning for unknown deterministic continuous-time linear systems", ["xml:lang"] = "en", }, }, table#738 { ["itemType"] = table#739 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#740 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q901046", }, ["subjectLabel"] = table#741 { ["type"] = "literal", ["value"] = "Real-time dynamic programming for Markov decision processes with imprecise probabilities", ["xml:lang"] = "en", }, }, table#742 { ["itemType"] = table#743 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#744 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1698902", }, ["subjectLabel"] = table#745 { ["type"] = "literal", ["value"] = "A rollout algorithm framework for heuristic solutions to finite-horizon stochastic dynamic programs", ["xml:lang"] = "en", }, }, table#746 { ["itemType"] = table#747 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#748 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q903601", }, ["subjectLabel"] = table#749 { ["type"] = "literal", ["value"] = "Reinforcement learning algorithms with function approximation: recent advances and applications", ["xml:lang"] = "en", }, }, table#750 { ["itemType"] = table#751 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#752 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1699932", }, ["subjectLabel"] = table#753 { ["type"] = "literal", ["value"] = "Totally model-free actor-critic recurrent neural-network reinforcement learning in non-Markovian domains", ["xml:lang"] = "en", }, }, table#754 { ["itemType"] = table#755 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#756 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1704136", }, ["subjectLabel"] = table#757 { ["type"] = "literal", ["value"] = "Asymptotic bias of stochastic gradient search", ["xml:lang"] = "en", }, }, table#758 { ["itemType"] = table#759 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#760 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1949593", }, ["subjectLabel"] = table#761 { ["type"] = "literal", ["value"] = "Dynamic programming and value-function approximation in sequential decision problems: error analysis and numerical results", ["xml:lang"] = "en", }, }, table#762 { ["itemType"] = table#763 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#764 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1414220", }, ["subjectLabel"] = table#765 { ["type"] = "literal", ["value"] = "Approximate receding horizon approach for Markov decision processes: average reward case", ["xml:lang"] = "en", }, }, table#766 { ["itemType"] = table#767 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#768 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1958625", }, ["subjectLabel"] = table#769 { ["type"] = "literal", ["value"] = "Online stochastic optimization under time constraints", ["xml:lang"] = "en", }, }, table#770 { ["itemType"] = table#771 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#772 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1958793", }, ["subjectLabel"] = table#773 { ["type"] = "literal", ["value"] = "Modeling and optimization of M/G/1-type queueing networks: an efficient sensitivity analysis approach", ["xml:lang"] = "en", }, }, table#774 { ["itemType"] = table#775 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#776 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1959536", }, ["subjectLabel"] = table#777 { ["type"] = "literal", ["value"] = "Training parsers by inverse reinforcement learning", ["xml:lang"] = "en", }, }, table#778 { ["itemType"] = table#779 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#780 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1422384", }, ["subjectLabel"] = table#781 { ["type"] = "literal", ["value"] = "On finding global optima for the hinge fitting problem.", ["xml:lang"] = "en", }, }, table#782 { ["itemType"] = table#783 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#784 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1719609", }, ["subjectLabel"] = table#785 { ["type"] = "literal", ["value"] = "A unified framework for stochastic optimization", ["xml:lang"] = "en", }, }, table#786 { ["itemType"] = table#787 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#788 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1427588", }, ["subjectLabel"] = table#789 { ["type"] = "literal", ["value"] = "Reinforcement learning for long-run average cost.", ["xml:lang"] = "en", }, }, table#790 { ["itemType"] = table#791 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#792 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q925829", }, ["subjectLabel"] = table#793 { ["type"] = "literal", ["value"] = "Approximate dynamic programming for link scheduling in wireless mesh networks", ["xml:lang"] = "en", }, }, table#794 { ["itemType"] = table#795 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#796 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1429103", }, ["subjectLabel"] = table#797 { ["type"] = "literal", ["value"] = "Convergent multiple-timescales reinforcement learning algorithms in normal form games", ["xml:lang"] = "en", }, }, table#798 { ["itemType"] = table#799 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#800 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1728313", }, ["subjectLabel"] = table#801 { ["type"] = "literal", ["value"] = "Sensitivity-based nested partitions for solving finite-horizon Markov decision processes", ["xml:lang"] = "en", }, }, table#802 { ["itemType"] = table#803 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#804 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1728357", }, ["subjectLabel"] = table#805 { ["type"] = "literal", ["value"] = "Linear programming formulation for non-stationary, finite-horizon Markov decision process models", ["xml:lang"] = "en", }, }, table#806 { ["itemType"] = table#807 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#808 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1729050", }, ["subjectLabel"] = table#809 { ["type"] = "literal", ["value"] = "Distributed adaptive dynamic programming for data-driven optimal control", ["xml:lang"] = "en", }, }, table#810 { ["itemType"] = table#811 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#812 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1730901", }, ["subjectLabel"] = table#813 { ["type"] = "literal", ["value"] = "Shape constraints in economics and operations research", ["xml:lang"] = "en", }, }, table#814 { ["itemType"] = table#815 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#816 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1741211", }, ["subjectLabel"] = table#817 { ["type"] = "literal", ["value"] = "Approximation of discounted minimax Markov control problems and zero-sum Markov games using Hausdorff and Wasserstein distances", ["xml:lang"] = "en", }, }, table#818 { ["itemType"] = table#819 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#820 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1983705", }, ["subjectLabel"] = table#821 { ["type"] = "literal", ["value"] = "Finite-horizon optimal control of discrete-time linear systems with completely unknown dynamics using Q-learning", ["xml:lang"] = "en", }, }, table#822 { ["itemType"] = table#823 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#824 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1745941", }, ["subjectLabel"] = table#825 { ["type"] = "literal", ["value"] = "Variance minimization of parameterized Markov decision processes", ["xml:lang"] = "en", }, }, table#826 { ["itemType"] = table#827 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#828 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1991259", }, ["subjectLabel"] = table#829 { ["type"] = "literal", ["value"] = "Planning horizons based proactive rescheduling for stochastic resource-constrained project scheduling problems", ["xml:lang"] = "en", }, }, table#830 { ["itemType"] = table#831 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#832 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1991289", }, ["subjectLabel"] = table#833 { ["type"] = "literal", ["value"] = "Dynamic pricing for vehicle ferries: using packing and simulation to optimize revenues", ["xml:lang"] = "en", }, }, table#834 { ["itemType"] = table#835 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#836 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1749910", }, ["subjectLabel"] = table#837 { ["type"] = "literal", ["value"] = "Optimal distributed synchronization control for continuous-time heterogeneous multi-agent differential graphical games", ["xml:lang"] = "en", }, }, table#838 { ["itemType"] = table#839 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#840 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1751900", }, ["subjectLabel"] = table#841 { ["type"] = "literal", ["value"] = "Approximate dynamic programming for missile defense interceptor fire control", ["xml:lang"] = "en", }, }, table#842 { ["itemType"] = table#843 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#844 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1996007", }, ["subjectLabel"] = table#845 { ["type"] = "literal", ["value"] = "Deep reinforcement learning with temporal logics", ["xml:lang"] = "en", }, }, table#846 { ["itemType"] = table#847 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#848 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1761300", }, ["subjectLabel"] = table#849 { ["type"] = "literal", ["value"] = "Discovering hidden structure in factored MDPs", ["xml:lang"] = "en", }, }, table#850 { ["itemType"] = table#851 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#852 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2004902", }, ["subjectLabel"] = table#853 { ["type"] = "literal", ["value"] = "Learning output reference model tracking for higher-order nonlinear systems with unknown dynamics", ["xml:lang"] = "en", }, }, table#854 { ["itemType"] = table#855 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#856 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1764238", }, ["subjectLabel"] = table#857 { ["type"] = "literal", ["value"] = "Solving variational inequality and fixed point problems by line searches and potential optimization", ["xml:lang"] = "en", }, }, table#858 { ["itemType"] = table#859 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#860 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2010105", }, ["subjectLabel"] = table#861 { ["type"] = "literal", ["value"] = "Incremental quasi-subgradient methods for minimizing the sum of quasi-convex functions", ["xml:lang"] = "en", }, }, table#862 { ["itemType"] = table#863 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#864 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q976388", }, ["subjectLabel"] = table#865 { ["type"] = "literal", ["value"] = "Synergies of operations research and data mining", ["xml:lang"] = "en", }, }, table#866 { ["itemType"] = table#867 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#868 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q976454", }, ["subjectLabel"] = table#869 { ["type"] = "literal", ["value"] = "Optimally maintaining a Markovian deteriorating system with limited imperfect repairs", ["xml:lang"] = "en", }, }, table#870 { ["itemType"] = table#871 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#872 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q976531", }, ["subjectLabel"] = table#873 { ["type"] = "literal", ["value"] = "Heterogeneous trading strategies with adaptive fuzzy actor-critic reinforcement learning: a behavioral approach", ["xml:lang"] = "en", }, }, table#874 { ["itemType"] = table#875 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#876 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q980598", }, ["subjectLabel"] = table#877 { ["type"] = "literal", ["value"] = "Stochastic dynamic programming applied to hydrothermal power systems operation planning based on the convex hull algorithm", ["xml:lang"] = "en", }, }, table#878 { ["itemType"] = table#879 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#880 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q980910", }, ["subjectLabel"] = table#881 { ["type"] = "literal", ["value"] = "Approximate dynamic programming with a fuzzy parameterization", ["xml:lang"] = "en", }, }, table#882 { ["itemType"] = table#883 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#884 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q980921", }, ["subjectLabel"] = table#885 { ["type"] = "literal", ["value"] = "Online actor-critic algorithm to solve the continuous-time infinite horizon optimal control problem", ["xml:lang"] = "en", }, }, table#886 { ["itemType"] = table#887 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#888 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q984311", }, ["subjectLabel"] = table#889 { ["type"] = "literal", ["value"] = "On solving integral equations using Markov chain Monte Carlo methods", ["xml:lang"] = "en", }, }, table#890 { ["itemType"] = table#891 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#892 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2021311", }, ["subjectLabel"] = table#893 { ["type"] = "literal", ["value"] = "Distributed resource allocation with binary decisions via Newton-like neural network dynamics", ["xml:lang"] = "en", }, }, table#894 { ["itemType"] = table#895 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#896 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1784527", }, ["subjectLabel"] = table#897 { ["type"] = "literal", ["value"] = "Adaptive importance sampling for value function approximation in off-policy reinforcement learning", ["xml:lang"] = "en", }, }, table#898 { ["itemType"] = table#899 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#900 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1785275", }, ["subjectLabel"] = table#901 { ["type"] = "literal", ["value"] = "Modified policy iteration algorithms are not strongly polynomial for discounted dynamic programming", ["xml:lang"] = "en", }, }, table#902 { ["itemType"] = table#903 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#904 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2030538", }, ["subjectLabel"] = table#905 { ["type"] = "literal", ["value"] = "Applications of stochastic modeling in air traffic management: methods, challenges and opportunities for solving air traffic problems under uncertainty", ["xml:lang"] = "en", }, }, table#906 { ["itemType"] = table#907 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#908 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2030995", }, ["subjectLabel"] = table#909 { ["type"] = "literal", ["value"] = "Optimal DoS attack scheduling for multi-sensor remote state estimation over interference channels", ["xml:lang"] = "en", }, }, table#910 { ["itemType"] = table#911 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#912 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2038496", }, ["subjectLabel"] = table#913 { ["type"] = "literal", ["value"] = "TT-QI: faster value iteration in tensor train format for stochastic optimal control", ["xml:lang"] = "en", }, }, table#914 { ["itemType"] = table#915 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#916 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1797759", }, ["subjectLabel"] = table#917 { ["type"] = "literal", ["value"] = "Off-policy temporal difference learning with distribution adaptation in fast mixing chains", ["xml:lang"] = "en", }, }, table#918 { ["itemType"] = table#919 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#920 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1004108", }, ["subjectLabel"] = table#921 { ["type"] = "literal", ["value"] = "Multi-period portfolio optimization with linear control policies", ["xml:lang"] = "en", }, }, table#922 { ["itemType"] = table#923 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#924 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1009226", }, ["subjectLabel"] = table#925 { ["type"] = "literal", ["value"] = "A formal framework and extensions for function approximation in learning classifier systems", ["xml:lang"] = "en", }, }, table#926 { ["itemType"] = table#927 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#928 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1009248", }, ["subjectLabel"] = table#929 { ["type"] = "literal", ["value"] = "Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path", ["xml:lang"] = "en", }, }, table#930 { ["itemType"] = table#931 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#932 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1009343", }, ["subjectLabel"] = table#933 { ["type"] = "literal", ["value"] = "Convergence analysis of batch gradient algorithm for three classes of sigma-pi neural networks", ["xml:lang"] = "en", }, }, table#934 { ["itemType"] = table#935 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#936 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1012492", }, ["subjectLabel"] = table#937 { ["type"] = "literal", ["value"] = "Projected equation methods for approximate solution of large linear systems", ["xml:lang"] = "en", }, }, table#938 { ["itemType"] = table#939 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#940 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1012874", }, ["subjectLabel"] = table#941 { ["type"] = "literal", ["value"] = "Adaptive optimal control for continuous-time linear systems based on policy iteration", ["xml:lang"] = "en", }, }, table#942 { ["itemType"] = table#943 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#944 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2049773", }, ["subjectLabel"] = table#945 { ["type"] = "literal", ["value"] = "Analysis of a class of dynamic programming models for multi-stage uncertain systems", ["xml:lang"] = "en", }, }, table#946 { ["itemType"] = table#947 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#948 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1013967", }, ["subjectLabel"] = table#949 { ["type"] = "literal", ["value"] = "A stochastic gradient type algorithm for closed-loop problems", ["xml:lang"] = "en", }, }, table#950 { ["itemType"] = table#951 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#952 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2051259", }, ["subjectLabel"] = table#953 { ["type"] = "literal", ["value"] = "Concentration bounds for temporal difference learning with linear function approximation: the case of batch data and uniform sampling", ["xml:lang"] = "en", }, }, table#954 { ["itemType"] = table#955 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#956 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1027533", }, ["subjectLabel"] = table#957 { ["type"] = "literal", ["value"] = "An approximate dynamic programming approach for the vehicle routing problem with stochastic demands", ["xml:lang"] = "en", }, }, table#958 { ["itemType"] = table#959 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#960 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1569221", }, ["subjectLabel"] = table#961 { ["type"] = "literal", ["value"] = "Bond management and max-min optimal control.", ["xml:lang"] = "en", }, }, table#962 { ["itemType"] = table#963 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#964 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1285809", }, ["subjectLabel"] = table#965 { ["type"] = "literal", ["value"] = "Application of orthogonal arrays and MARS to inventory forecasting stochastic dynamic programs.", ["xml:lang"] = "en", }, }, table#966 { ["itemType"] = table#967 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#968 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1037957", }, ["subjectLabel"] = table#969 { ["type"] = "literal", ["value"] = "Reinforcement distribution in fuzzy Q-learning", ["xml:lang"] = "en", }, }, table#970 { ["itemType"] = table#971 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#972 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1041999", }, ["subjectLabel"] = table#973 { ["type"] = "literal", ["value"] = "Pricing substitutable flights in airline revenue management", ["xml:lang"] = "en", }, }, table#974 { ["itemType"] = table#975 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#976 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1042122", }, ["subjectLabel"] = table#977 { ["type"] = "literal", ["value"] = "Resource-constrained management of heterogeneous assets with stochastic deterioration", ["xml:lang"] = "en", }, }, table#978 { ["itemType"] = table#979 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#980 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1042309", }, ["subjectLabel"] = table#981 { ["type"] = "literal", ["value"] = "Theoretical tools for understanding and aiding dynamic decision making", ["xml:lang"] = "en", }, }, table#982 { ["itemType"] = table#983 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#984 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1042310", }, ["subjectLabel"] = table#985 { ["type"] = "literal", ["value"] = "Reinforcement learning in the brain", ["xml:lang"] = "en", }, }, table#986 { ["itemType"] = table#987 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#988 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1579026", }, ["subjectLabel"] = table#989 { ["type"] = "literal", ["value"] = "Comparing neuro-dynamic programming algorithms for the vehicle routing problem with stochastic demands", ["xml:lang"] = "en", }, }, table#990 { ["itemType"] = table#991 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#992 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1046075", }, ["subjectLabel"] = table#993 { ["type"] = "literal", ["value"] = "Limitations of learning in automata-based systems", ["xml:lang"] = "en", }, }, table#994 { ["itemType"] = table#995 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#996 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1296370", }, ["subjectLabel"] = table#997 { ["type"] = "literal", ["value"] = "A maxmin policy for bond management", ["xml:lang"] = "en", }, }, table#998 { ["itemType"] = table#999 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1000 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1583230", }, ["subjectLabel"] = table#1001 { ["type"] = "literal", ["value"] = "Stochastic dynamic programming with factored representations", ["xml:lang"] = "en", }, }, table#1002 { ["itemType"] = table#1003 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1004 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1583513", }, ["subjectLabel"] = table#1005 { ["type"] = "literal", ["value"] = "Bounded-parameter Markov decision processes", ["xml:lang"] = "en", }, }, table#1006 { ["itemType"] = table#1007 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1008 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1049136", }, ["subjectLabel"] = table#1009 { ["type"] = "literal", ["value"] = "Natural actor-critic algorithms", ["xml:lang"] = "en", }, }, table#1010 { ["itemType"] = table#1011 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1012 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1847251", }, ["subjectLabel"] = table#1013 { ["type"] = "literal", ["value"] = "Exploiting structure in adaptive dynamic programming algorithms for a stochastic batch service problem", ["xml:lang"] = "en", }, }, table#1014 { ["itemType"] = table#1015 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1016 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q1596471", }, ["subjectLabel"] = table#1017 { ["type"] = "literal", ["value"] = "Monte Carlo \$TD(\\lambda)\$-methods for the optimal control of discrete-time Markovian jump linear systems", ["xml:lang"] = "en", }, }, table#1018 { ["itemType"] = table#1019 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1020 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2125514", }, ["subjectLabel"] = table#1021 { ["type"] = "literal", ["value"] = "Variable demand and multi-commodity flow in Markovian network equilibrium", ["xml:lang"] = "en", }, }, table#1022 { ["itemType"] = table#1023 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1024 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2364485", }, ["subjectLabel"] = table#1025 { ["type"] = "literal", ["value"] = "Feasible methods for nonconvex nonsmooth problems with applications in green communications", ["xml:lang"] = "en", }, }, table#1026 { ["itemType"] = table#1027 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1028 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2129140", }, ["subjectLabel"] = table#1029 { ["type"] = "literal", ["value"] = "Stochastic quasi-subgradient method for stochastic quasi-convex feasibility problems", ["xml:lang"] = "en", }, }, table#1030 { ["itemType"] = table#1031 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1032 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2628425", }, ["subjectLabel"] = table#1033 { ["type"] = "literal", ["value"] = "On integral generalized policy iteration for continuous-time linear quadratic regulations", ["xml:lang"] = "en", }, }, table#1034 { ["itemType"] = table#1035 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1036 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2628473", }, ["subjectLabel"] = table#1037 { ["type"] = "literal", ["value"] = "A stochastic games framework for verification and control of discrete time stochastic hybrid systems", ["xml:lang"] = "en", }, }, table#1038 { ["itemType"] = table#1039 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1040 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2633537", }, ["subjectLabel"] = table#1041 { ["type"] = "literal", ["value"] = "A unified DC programming framework and efficient DCA based approaches for large scale batch reinforcement learning", ["xml:lang"] = "en", }, }, table#1042 { ["itemType"] = table#1043 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1044 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2383522", }, ["subjectLabel"] = table#1045 { ["type"] = "literal", ["value"] = "Guiding exploration by pre-existing knowledge without modifying reward", ["xml:lang"] = "en", }, }, table#1046 { ["itemType"] = table#1047 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1048 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2389624", }, ["subjectLabel"] = table#1049 { ["type"] = "literal", ["value"] = "Restricted gradient-descent algorithm for value-function approximation in reinforcement learning", ["xml:lang"] = "en", }, }, table#1050 { ["itemType"] = table#1051 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1052 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2391448", }, ["subjectLabel"] = table#1053 { ["type"] = "literal", ["value"] = "Multi-agent differential graphical games: online adaptive learning solution for synchronization with optimality", ["xml:lang"] = "en", }, }, table#1054 { ["itemType"] = table#1055 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1056 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2391487", }, ["subjectLabel"] = table#1057 { ["type"] = "literal", ["value"] = "Optimal control of unknown nonaffine nonlinear discrete-time systems based on adaptive dynamic programming", ["xml:lang"] = "en", }, }, table#1058 { ["itemType"] = table#1059 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1060 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2158972", }, ["subjectLabel"] = table#1061 { ["type"] = "literal", ["value"] = "Stability-constrained Markov decision processes using MPC", ["xml:lang"] = "en", }, }, table#1062 { ["itemType"] = table#1063 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1064 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2161893", }, ["subjectLabel"] = table#1065 { ["type"] = "literal", ["value"] = "Model-free finite-horizon optimal tracking control of discrete-time linear systems", ["xml:lang"] = "en", }, }, table#1066 { ["itemType"] = table#1067 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1068 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2403314", }, ["subjectLabel"] = table#1069 { ["type"] = "literal", ["value"] = "Neuro-optimal tracking control for a class of discrete-time nonlinear systems via generalized value iteration adaptive dynamic programming approach", ["xml:lang"] = "en", }, }, table#1070 { ["itemType"] = table#1071 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1072 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2171554", }, ["subjectLabel"] = table#1073 { ["type"] = "literal", ["value"] = "Testing facility location and dynamic capacity planning for pandemics with demand uncertainty", ["xml:lang"] = "en", }, }, table#1074 { ["itemType"] = table#1075 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1076 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2660955", }, ["subjectLabel"] = table#1077 { ["type"] = "literal", ["value"] = "Event-triggered constrained control with DHP implementation for nonaffine discrete-time systems", ["xml:lang"] = "en", }, }, table#1078 { ["itemType"] = table#1079 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1080 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2661516", }, ["subjectLabel"] = table#1081 { ["type"] = "literal", ["value"] = "Complexity bounds for approximately solving discounted MDPs by value iterations", ["xml:lang"] = "en", }, }, table#1082 { ["itemType"] = table#1083 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1084 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2662307", }, ["subjectLabel"] = table#1085 { ["type"] = "literal", ["value"] = "When control and state variations increase uncertainty: modeling and stochastic control in discrete time", ["xml:lang"] = "en", }, }, table#1086 { ["itemType"] = table#1087 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1088 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2664231", }, ["subjectLabel"] = table#1089 { ["type"] = "literal", ["value"] = "Stochastic quasi-Newton with line-search regularisation", ["xml:lang"] = "en", }, }, table#1090 { ["itemType"] = table#1091 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1092 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2664400", }, ["subjectLabel"] = table#1093 { ["type"] = "literal", ["value"] = "Efficient approximate dynamic programming based on design and analysis of computer experiments for infinite-horizon optimization", ["xml:lang"] = "en", }, }, table#1094 { ["itemType"] = table#1095 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1096 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2407982", }, ["subjectLabel"] = table#1097 { ["type"] = "literal", ["value"] = "Symblicit algorithms for mean-payoff and shortest path in monotonic Markov decision processes", ["xml:lang"] = "en", }, }, table#1098 { ["itemType"] = table#1099 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1100 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2665181", }, ["subjectLabel"] = table#1101 { ["type"] = "literal", ["value"] = "On the convergence of reinforcement learning with Monte Carlo exploring starts", ["xml:lang"] = "en", }, }, table#1102 { ["itemType"] = table#1103 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1104 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2408515", }, ["subjectLabel"] = table#1105 { ["type"] = "literal", ["value"] = "On learning and branching: a survey", ["xml:lang"] = "en", }, }, table#1106 { ["itemType"] = table#1107 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1108 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2665377", }, ["subjectLabel"] = table#1109 { ["type"] = "literal", ["value"] = "Subgradient averaging for multi-agent optimisation with different constraint sets", ["xml:lang"] = "en", }, }, table#1110 { ["itemType"] = table#1111 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1112 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2409333", }, ["subjectLabel"] = table#1113 { ["type"] = "literal", ["value"] = "A stability criterion for two timescale stochastic approximation schemes", ["xml:lang"] = "en", }, }, table#1114 { ["itemType"] = table#1115 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1116 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2177769", }, ["subjectLabel"] = table#1117 { ["type"] = "literal", ["value"] = "Time-optimal control of large-scale systems of systems using compositional optimization", ["xml:lang"] = "en", }, }, table#1118 { ["itemType"] = table#1119 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1120 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2178364", }, ["subjectLabel"] = table#1121 { ["type"] = "literal", ["value"] = "Efficient algorithms of pathwise dynamic programming for decision optimization in mining operations", ["xml:lang"] = "en", }, }, table#1122 { ["itemType"] = table#1123 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1124 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2179316", }, ["subjectLabel"] = table#1125 { ["type"] = "literal", ["value"] = "Self-learning robust optimal control for continuous-time nonlinear systems with mismatched disturbances", ["xml:lang"] = "en", }, }, table#1126 { ["itemType"] = table#1127 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1128 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2181593", }, ["subjectLabel"] = table#1129 { ["type"] = "literal", ["value"] = "Numerically tractable optimistic bilevel problems", ["xml:lang"] = "en", }, }, table#1130 { ["itemType"] = table#1131 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1132 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2182880", }, ["subjectLabel"] = table#1133 { ["type"] = "literal", ["value"] = "Neural circuits for learning context-dependent associations of stimuli", ["xml:lang"] = "en", }, }, table#1134 { ["itemType"] = table#1135 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1136 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2183325", }, ["subjectLabel"] = table#1137 { ["type"] = "literal", ["value"] = "Meso-parametric value function approximation for dynamic customer acceptances in delivery routing", ["xml:lang"] = "en", }, }, table#1138 { ["itemType"] = table#1139 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1140 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2673581", }, ["subjectLabel"] = table#1141 { ["type"] = "literal", ["value"] = "Dynamic focus programming: a new approach to sequential decision problems under uncertainty", ["xml:lang"] = "en", }, }, table#1142 { ["itemType"] = table#1143 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1144 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2415977", }, ["subjectLabel"] = table#1145 { ["type"] = "literal", ["value"] = "A data-driven neural network approach to optimal asset allocation for target based defined contribution pension plans", ["xml:lang"] = "en", }, }, table#1146 { ["itemType"] = table#1147 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1148 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2185716", }, ["subjectLabel"] = table#1149 { ["type"] = "literal", ["value"] = "Improved value iteration for neural-network-based stochastic optimal control design", ["xml:lang"] = "en", }, }, table#1150 { ["itemType"] = table#1151 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1152 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2185769", }, ["subjectLabel"] = table#1153 { ["type"] = "literal", ["value"] = "Robust min-max optimal control design for systems with uncertain models: a neural dynamic programming approach", ["xml:lang"] = "en", }, }, table#1154 { ["itemType"] = table#1155 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1156 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2675991", }, ["subjectLabel"] = table#1157 { ["type"] = "literal", ["value"] = "Asymptotic optimality and rates of convergence of quantized stationary policies in continuous-time Markov decision processes", ["xml:lang"] = "en", }, }, table#1158 { ["itemType"] = table#1159 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1160 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2188278", }, ["subjectLabel"] = table#1161 { ["type"] = "literal", ["value"] = "Reinforcement learning for adaptive optimal control of continuous-time linear periodic systems", ["xml:lang"] = "en", }, }, table#1162 { ["itemType"] = table#1163 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1164 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2682336", }, ["subjectLabel"] = table#1165 { ["type"] = "literal", ["value"] = "Dynamic parameters in sequential decision making", ["xml:lang"] = "en", }, }, table#1166 { ["itemType"] = table#1167 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1168 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2425169", }, ["subjectLabel"] = table#1169 { ["type"] = "literal", ["value"] = "Perturbed proximal primal-dual algorithm for nonconvex nonsmooth optimization", ["xml:lang"] = "en", }, }, table#1170 { ["itemType"] = table#1171 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1172 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2426563", }, ["subjectLabel"] = table#1173 { ["type"] = "literal", ["value"] = "Optimization of a special case of continuous-time Markov decision processes with compact action set", ["xml:lang"] = "en", }, }, table#1174 { ["itemType"] = table#1175 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1176 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2199257", }, ["subjectLabel"] = table#1177 { ["type"] = "literal", ["value"] = "Dynamic optimization over infinite-time horizon: web-building strategy in an orb-weaving spider as a case study", ["xml:lang"] = "en", }, }, table#1178 { ["itemType"] = table#1179 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1180 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2430595", }, ["subjectLabel"] = table#1181 { ["type"] = "literal", ["value"] = "Dynamic pricing and inventory control: robust vs. stochastic uncertainty models---a computational study", ["xml:lang"] = "en", }, }, table#1182 { ["itemType"] = table#1183 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1184 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2433384", }, ["subjectLabel"] = table#1185 { ["type"] = "literal", ["value"] = "Blood platelet production: optimization by dynamic programming and simulation", ["xml:lang"] = "en", }, }, table#1186 { ["itemType"] = table#1187 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1188 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2433500", }, ["subjectLabel"] = table#1189 { ["type"] = "literal", ["value"] = "Neuro-dynamic trading methods", ["xml:lang"] = "en", }, }, table#1190 { ["itemType"] = table#1191 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1192 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2698605", }, ["subjectLabel"] = table#1193 { ["type"] = "literal", ["value"] = "Integrated condition-based maintenance and multi-item lot-sizing with stochastic demand", ["xml:lang"] = "en", }, }, table#1194 { ["itemType"] = table#1195 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1196 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2208545", }, ["subjectLabel"] = table#1197 { ["type"] = "literal", ["value"] = "Convergence analysis of the deep neural networks based globalized dual heuristic programming", ["xml:lang"] = "en", }, }, table#1198 { ["itemType"] = table#1199 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1200 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2440701", }, ["subjectLabel"] = table#1201 { ["type"] = "literal", ["value"] = "New algorithms of the Q-learning type", ["xml:lang"] = "en", }, }, table#1202 { ["itemType"] = table#1203 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1204 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2440770", }, ["subjectLabel"] = table#1205 { ["type"] = "literal", ["value"] = "Water reservoir control under economic, social and environmental constraints", ["xml:lang"] = "en", }, }, table#1206 { ["itemType"] = table#1207 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1208 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2441574", }, ["subjectLabel"] = table#1209 { ["type"] = "literal", ["value"] = "Computational bounds for elevator control policies by large scale linear programming", ["xml:lang"] = "en", }, }, table#1210 { ["itemType"] = table#1211 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1212 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2215190", }, ["subjectLabel"] = table#1213 { ["type"] = "literal", ["value"] = "Dynamic speed scaling minimizing expected energy consumption for real-time tasks", ["xml:lang"] = "en", }, }, table#1214 { ["itemType"] = table#1215 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1216 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2215764", }, ["subjectLabel"] = table#1217 { ["type"] = "literal", ["value"] = "Controlled sequential Monte Carlo", ["xml:lang"] = "en", }, }, table#1218 { ["itemType"] = table#1219 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1220 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2216197", }, ["subjectLabel"] = table#1221 { ["type"] = "literal", ["value"] = "A conservative index heuristic for routing problems with multiple heterogeneous service facilities", ["xml:lang"] = "en", }, }, table#1222 { ["itemType"] = table#1223 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1224 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2446442", }, ["subjectLabel"] = table#1225 { ["type"] = "literal", ["value"] = "From model-based control to data-driven control: survey, classification and perspective", ["xml:lang"] = "en", }, }, table#1226 { ["itemType"] = table#1227 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1228 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2449060", }, ["subjectLabel"] = table#1229 { ["type"] = "literal", ["value"] = "PageRank optimization by edge selection", ["xml:lang"] = "en", }, }, table#1230 { ["itemType"] = table#1231 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1232 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2450902", }, ["subjectLabel"] = table#1233 { ["type"] = "literal", ["value"] = "Approximate dynamic programming for stochastic \$N\$-stage optimization with application to optimal consumption under uncertainty", ["xml:lang"] = "en", }, }, table#1234 { ["itemType"] = table#1235 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1236 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2457912", }, ["subjectLabel"] = table#1237 { ["type"] = "literal", ["value"] = "Converging marriage in honey-bees optimization and application to stochastic dynamic programming", ["xml:lang"] = "en", }, }, table#1238 { ["itemType"] = table#1239 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1240 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2462546", }, ["subjectLabel"] = table#1241 { ["type"] = "literal", ["value"] = "Application of reinforcement learning to the game of Othello", ["xml:lang"] = "en", }, }, table#1242 { ["itemType"] = table#1243 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1244 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2470042", }, ["subjectLabel"] = table#1245 { ["type"] = "literal", ["value"] = "Adaptive stepsize selection for tracking in a regime-switching environment", ["xml:lang"] = "en", }, }, table#1246 { ["itemType"] = table#1247 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1248 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2242923", }, ["subjectLabel"] = table#1249 { ["type"] = "literal", ["value"] = "Smoothed functional-based gradient algorithms for off-policy reinforcement learning: a non-asymptotic viewpoint", ["xml:lang"] = "en", }, }, table#1250 { ["itemType"] = table#1251 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1252 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2246185", }, ["subjectLabel"] = table#1253 { ["type"] = "literal", ["value"] = "Asynchronous Lagrangian scenario decomposition", ["xml:lang"] = "en", }, }, table#1254 { ["itemType"] = table#1255 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1256 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2247268", }, ["subjectLabel"] = table#1257 { ["type"] = "literal", ["value"] = "An approximate dynamic programming approach to project scheduling with uncertain resource availabilities", ["xml:lang"] = "en", }, }, table#1258 { ["itemType"] = table#1259 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1260 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2477014", }, ["subjectLabel"] = table#1261 { ["type"] = "literal", ["value"] = "Efficient sampling in approximate dynamic programming algorithms", ["xml:lang"] = "en", }, }, table#1262 { ["itemType"] = table#1263 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1264 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2477711", }, ["subjectLabel"] = table#1265 { ["type"] = "literal", ["value"] = "Motion planning in uncertain environments with vision-like sensors", ["xml:lang"] = "en", }, }, table#1266 { ["itemType"] = table#1267 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1268 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2253376", }, ["subjectLabel"] = table#1269 { ["type"] = "literal", ["value"] = "Dynamic multi-appointment patient scheduling for radiation therapy", ["xml:lang"] = "en", }, }, table#1270 { ["itemType"] = table#1271 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1272 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2483501", }, ["subjectLabel"] = table#1273 { ["type"] = "literal", ["value"] = "Dynamic modeling and control of supply chain systems: A review", ["xml:lang"] = "en", }, }, table#1274 { ["itemType"] = table#1275 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1276 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2485925", }, ["subjectLabel"] = table#1277 { ["type"] = "literal", ["value"] = "A tutorial on the cross-entropy method", ["xml:lang"] = "en", }, }, table#1278 { ["itemType"] = table#1279 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1280 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2485935", }, ["subjectLabel"] = table#1281 { ["type"] = "literal", ["value"] = "Basis function adaptation in temporal difference reinforcement learning", ["xml:lang"] = "en", }, }, table#1282 { ["itemType"] = table#1283 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1284 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2486109", }, ["subjectLabel"] = table#1285 { ["type"] = "literal", ["value"] = "Approximate dynamic programming-based approaches for input--output data-driven control of nonlinear processes", ["xml:lang"] = "en", }, }, table#1286 { ["itemType"] = table#1287 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1288 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2489332", }, ["subjectLabel"] = table#1289 { ["type"] = "literal", ["value"] = "Convergence property of gradient-type methods with non-monotone line search in the presence of perturbations", ["xml:lang"] = "en", }, }, table#1290 { ["itemType"] = table#1291 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1292 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2503529", }, ["subjectLabel"] = table#1293 { ["type"] = "literal", ["value"] = "Linear stochastic approximation driven by slowly varying Markov chains", ["xml:lang"] = "en", }, }, table#1294 { ["itemType"] = table#1295 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1296 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2504518", }, ["subjectLabel"] = table#1297 { ["type"] = "literal", ["value"] = "An actor-critic algorithm for constrained Markov decision processes", ["xml:lang"] = "en", }, }, table#1298 { ["itemType"] = table#1299 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1300 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2504669", }, ["subjectLabel"] = table#1301 { ["type"] = "literal", ["value"] = "Boundedness of iterates in \$Q\$-learning", ["xml:lang"] = "en", }, }, table#1302 { ["itemType"] = table#1303 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1304 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2507374", }, ["subjectLabel"] = table#1305 { ["type"] = "literal", ["value"] = "Learning dynamic prices in electronic retail markets with customer segmentation", ["xml:lang"] = "en", }, }, table#1306 { ["itemType"] = table#1307 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1308 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2285897", }, ["subjectLabel"] = table#1309 { ["type"] = "literal", ["value"] = "Comparing heuristics for the product allocation problem in multi-level warehouses under compatibility constraints", ["xml:lang"] = "en", }, }, table#1310 { ["itemType"] = table#1311 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1312 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2511993", }, ["subjectLabel"] = table#1313 { ["type"] = "literal", ["value"] = "Dynamic programming and suboptimal control: a survey from ADP to MPC", ["xml:lang"] = "en", }, }, table#1314 { ["itemType"] = table#1315 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1316 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2292214", }, ["subjectLabel"] = table#1317 { ["type"] = "literal", ["value"] = "Kernel dynamic policy programming: applicable reinforcement learning to robot systems with high dimensional states", ["xml:lang"] = "en", }, }, table#1318 { ["itemType"] = table#1319 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1320 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2519761", }, ["subjectLabel"] = table#1321 { ["type"] = "literal", ["value"] = "A note on linear function approximation using random projections", ["xml:lang"] = "en", }, }, table#1322 { ["itemType"] = table#1323 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1324 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2297577", }, ["subjectLabel"] = table#1325 { ["type"] = "literal", ["value"] = "An approximate dynamic programming approach for comparing firing policies in a networked air defense environment", ["xml:lang"] = "en", }, }, table#1326 { ["itemType"] = table#1327 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1328 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2304561", }, ["subjectLabel"] = table#1329 { ["type"] = "literal", ["value"] = "Computational aspects of optimal strategic network diffusion", ["xml:lang"] = "en", }, }, table#1330 { ["itemType"] = table#1331 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1332 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2056961", }, ["subjectLabel"] = table#1333 { ["type"] = "literal", ["value"] = "MASAGE: model-agnostic sequential and adaptive game estimation", ["xml:lang"] = "en", }, }, table#1334 { ["itemType"] = table#1335 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1336 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2063818", }, ["subjectLabel"] = table#1337 { ["type"] = "literal", ["value"] = "Data-driven optimal control with a relaxed linear program", ["xml:lang"] = "en", }, }, table#1338 { ["itemType"] = table#1339 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1340 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2312346", }, ["subjectLabel"] = table#1341 { ["type"] = "literal", ["value"] = "Dynamic pricing with Bayesian demand learning and reference price effect", ["xml:lang"] = "en", }, }, table#1342 { ["itemType"] = table#1343 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1344 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2076812", }, ["subjectLabel"] = table#1345 { ["type"] = "literal", ["value"] = "Deep reinforcement learning for inventory control: a roadmap", ["xml:lang"] = "en", }, }, table#1346 { ["itemType"] = table#1347 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1348 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2076861", }, ["subjectLabel"] = table#1349 { ["type"] = "literal", ["value"] = "Stochastic dynamic vehicle routing in the light of prescriptive analytics: a review", ["xml:lang"] = "en", }, }, table#1350 { ["itemType"] = table#1351 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1352 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2333003", }, ["subjectLabel"] = table#1353 { ["type"] = "literal", ["value"] = "An aggregation-based approximate dynamic programming approach for the periodic review model with random yield", ["xml:lang"] = "en", }, }, table#1354 { ["itemType"] = table#1355 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1356 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2334181", }, ["subjectLabel"] = table#1357 { ["type"] = "literal", ["value"] = "An improvement of single-network adaptive critic design for nonlinear systems with asymmetry constraints", ["xml:lang"] = "en", }, }, table#1358 { ["itemType"] = table#1359 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1360 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2571441", }, ["subjectLabel"] = table#1361 { ["type"] = "literal", ["value"] = "Dynamic pricing models for electronic business", ["xml:lang"] = "en", }, }, table#1362 { ["itemType"] = table#1363 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1364 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2571446", }, ["subjectLabel"] = table#1365 { ["type"] = "literal", ["value"] = "Monte Carlo methods for pricing financial options", ["xml:lang"] = "en", }, }, table#1366 { ["itemType"] = table#1367 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1368 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2094026", }, ["subjectLabel"] = table#1369 { ["type"] = "literal", ["value"] = "Reinforcement learning for distributed control and multi-player games", ["xml:lang"] = "en", }, }, table#1370 { ["itemType"] = table#1371 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1372 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2094027", }, ["subjectLabel"] = table#1373 { ["type"] = "literal", ["value"] = "From reinforcement learning to optimal control: a unified framework for sequential decisions", ["xml:lang"] = "en", }, }, table#1374 { ["itemType"] = table#1375 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1376 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2094053", }, ["subjectLabel"] = table#1377 { ["type"] = "literal", ["value"] = "Reinforcement learning: an industrial perspective", ["xml:lang"] = "en", }, }, table#1378 { ["itemType"] = table#1379 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1380 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2094058", }, ["subjectLabel"] = table#1381 { ["type"] = "literal", ["value"] = "The role of systems biology, neuroscience, and thermodynamics in network control and learning", ["xml:lang"] = "en", }, }, table#1382 { ["itemType"] = table#1383 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1384 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2338706", }, ["subjectLabel"] = table#1385 { ["type"] = "literal", ["value"] = "Stochastic approximations of constrained discounted Markov decision processes", ["xml:lang"] = "en", }, }, table#1386 { ["itemType"] = table#1387 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1388 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2576079", }, ["subjectLabel"] = table#1389 { ["type"] = "literal", ["value"] = "Stochastic iterative dynamic programming: a Monte Carlo approach to dual control", ["xml:lang"] = "en", }, }, table#1390 { ["itemType"] = table#1391 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1392 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2576080", }, ["subjectLabel"] = table#1393 { ["type"] = "literal", ["value"] = "Nearly optimal control laws for nonlinear systems with saturating actuators using a neural network HJB approach", ["xml:lang"] = "en", }, }, table#1394 { ["itemType"] = table#1395 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1396 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2339417", }, ["subjectLabel"] = table#1397 { ["type"] = "literal", ["value"] = "Discrete-time online learning control for a class of unknown nonaffine nonlinear systems using reinforcement learning", ["xml:lang"] = "en", }, }, table#1398 { ["itemType"] = table#1399 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1400 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2097782", }, ["subjectLabel"] = table#1401 { ["type"] = "literal", ["value"] = "Finite-sample analysis of nonlinear stochastic approximation with applications in reinforcement learning", ["xml:lang"] = "en", }, }, table#1402 { ["itemType"] = table#1403 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1404 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2342428", }, ["subjectLabel"] = table#1405 { ["type"] = "literal", ["value"] = "Multi-agent discrete-time graphical games and reinforcement learning solutions", ["xml:lang"] = "en", }, }, table#1406 { ["itemType"] = table#1407 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1408 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2348377", }, ["subjectLabel"] = table#1409 { ["type"] = "literal", ["value"] = "Performance optimization for a class of generalized stochastic Petri nets", ["xml:lang"] = "en", }, }, table#1410 { ["itemType"] = table#1411 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1412 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2116660", }, ["subjectLabel"] = table#1413 { ["type"] = "literal", ["value"] = "Whittle index based Q-learning for restless bandits with average reward", ["xml:lang"] = "en", }, }, table#1414 { ["itemType"] = table#1415 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1416 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2356038", }, ["subjectLabel"] = table#1417 { ["type"] = "literal", ["value"] = "Dynamic decision making for graphical models applied to oil exploration", ["xml:lang"] = "en", }, }, table#1418 { ["itemType"] = table#1419 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1420 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5037552", }, ["subjectLabel"] = table#1421 { ["type"] = "literal", ["value"] = "Some Limit Properties of Markov Chains Induced by Recursive Stochastic Algorithms", ["xml:lang"] = "en", }, }, table#1422 { ["itemType"] = table#1423 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1424 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2887629", }, ["subjectLabel"] = table#1425 { ["type"] = "literal", ["value"] = "Approximate policy iteration: a survey and some new methods", ["xml:lang"] = "en", }, }, table#1426 { ["itemType"] = table#1427 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1428 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2887630", }, ["subjectLabel"] = table#1429 { ["type"] = "literal", ["value"] = "A review of stochastic algorithms with continuous value function approximation and some new approximate policy iteration algorithms for multidimensional continuous applications", ["xml:lang"] = "en", }, }, table#1430 { ["itemType"] = table#1431 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1432 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5214230", }, ["subjectLabel"] = table#1433 { ["type"] = "literal", ["value"] = "Generalized maximum entropy estimation", ["xml:lang"] = "en", }, }, table#1434 { ["itemType"] = table#1435 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1436 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3178726", }, ["subjectLabel"] = table#1437 { ["type"] = "literal", ["value"] = "Algorithms for Optimal Control of Stochastic Switching Systems", ["xml:lang"] = "en", }, }, table#1438 { ["itemType"] = table#1439 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1440 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5057995", }, ["subjectLabel"] = table#1441 { ["type"] = "literal", ["value"] = "ExpertRNA: A New Framework for RNA Secondary Structure Prediction", ["xml:lang"] = "en", }, }, table#1442 { ["itemType"] = table#1443 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1444 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5060521", }, ["subjectLabel"] = table#1445 { ["type"] = "literal", ["value"] = "Actor-Critic–Like Stochastic Adaptive Search for Continuous Simulation Optimization", ["xml:lang"] = "en", }, }, table#1446 { ["itemType"] = table#1447 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1448 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5060525", }, ["subjectLabel"] = table#1449 { ["type"] = "literal", ["value"] = "Scalable Reinforcement Learning for Multiagent Networked Systems", ["xml:lang"] = "en", }, }, table#1450 { ["itemType"] = table#1451 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1452 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5214215", }, ["subjectLabel"] = table#1453 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5214215", }, }, table#1454 { ["itemType"] = table#1455 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1456 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5214220", }, ["subjectLabel"] = table#1457 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5214220", }, }, table#1458 { ["itemType"] = table#1459 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1460 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5071443", }, ["subjectLabel"] = table#1461 { ["type"] = "literal", ["value"] = "Stochastic Learning Approach for Binary Optimization: Application to Bayesian Optimal Design of Experiments", ["xml:lang"] = "en", }, }, table#1462 { ["itemType"] = table#1463 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1464 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3196115", }, ["subjectLabel"] = table#1465 { ["type"] = "literal", ["value"] = "Discrete-time dynamic graphical games: model-free reinforcement learning solution", ["xml:lang"] = "en", }, }, table#1466 { ["itemType"] = table#1467 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1468 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5076329", }, ["subjectLabel"] = table#1469 { ["type"] = "literal", ["value"] = "Computational Benefits of Intermediate Rewards for Goal-Reaching Policy Learning", ["xml:lang"] = "en", }, }, table#1470 { ["itemType"] = table#1471 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1472 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4969174", }, ["subjectLabel"] = table#1473 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4969174", }, }, table#1474 { ["itemType"] = table#1475 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1476 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4571046", }, ["subjectLabel"] = table#1477 { ["type"] = "literal", ["value"] = "From Infinite to Finite Programs: Explicit Error Bounds with Applications to Approximate Dynamic Programming", ["xml:lang"] = "en", }, }, table#1478 { ["itemType"] = table#1479 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1480 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5084496", }, ["subjectLabel"] = table#1481 { ["type"] = "literal", ["value"] = "Asymptotics of Reinforcement Learning with Neural Networks", ["xml:lang"] = "en", }, }, table#1482 { ["itemType"] = table#1483 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1484 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2937736", }, ["subjectLabel"] = table#1485 { ["type"] = "literal", ["value"] = "Markov Reward Models and Markov Decision Processes in Discrete and Continuous Time: Performance Evaluation and Optimization", ["xml:lang"] = "en", }, }, table#1486 { ["itemType"] = table#1487 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1488 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5088832", }, ["subjectLabel"] = table#1489 { ["type"] = "literal", ["value"] = "Multiple-sets split quasi-convex feasibility problems: Adaptive subgradient methods with convergence guarantee", ["xml:lang"] = "en", }, }, table#1490 { ["itemType"] = table#1491 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1492 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5094025", }, ["subjectLabel"] = table#1493 { ["type"] = "literal", ["value"] = "Automated Reinforcement Learning (AutoRL): A Survey and Open Problems", ["xml:lang"] = "en", }, }, table#1494 { ["itemType"] = table#1495 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1496 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5094037", }, ["subjectLabel"] = table#1497 { ["type"] = "literal", ["value"] = "Flexible FOND Planning with Explicit Fairness Assumptions", ["xml:lang"] = "en", }, }, table#1498 { ["itemType"] = table#1499 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1500 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5102286", }, ["subjectLabel"] = table#1501 { ["type"] = "literal", ["value"] = "Risk-Sensitive Reinforcement Learning via Policy Gradient Search", ["xml:lang"] = "en", }, }, table#1502 { ["itemType"] = table#1503 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1504 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5106373", }, ["subjectLabel"] = table#1505 { ["type"] = "literal", ["value"] = "Dynamic Stochastic Matching Under Limited Time", ["xml:lang"] = "en", }, }, table#1506 { ["itemType"] = table#1507 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1508 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999027", }, ["subjectLabel"] = table#1509 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999027", }, }, table#1510 { ["itemType"] = table#1511 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1512 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999029", }, ["subjectLabel"] = table#1513 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999029", }, }, table#1514 { ["itemType"] = table#1515 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1516 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999096", }, ["subjectLabel"] = table#1517 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999096", }, }, table#1518 { ["itemType"] = table#1519 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1520 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4602532", }, ["subjectLabel"] = table#1521 { ["type"] = "literal", ["value"] = "Ordinary Differential Equation Methods for Markov Decision Processes and Application to Kullback--Leibler Control Cost", ["xml:lang"] = "en", }, }, table#1522 { ["itemType"] = table#1523 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1524 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2974203", }, ["subjectLabel"] = table#1525 { ["type"] = "literal", ["value"] = "A novel optimal tracking control scheme for a class of discrete-time nonlinear systems using generalised policy iteration adaptive dynamic programming algorithm", ["xml:lang"] = "en", }, }, table#1526 { ["itemType"] = table#1527 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1528 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2701089", }, ["subjectLabel"] = table#1529 { ["type"] = "literal", ["value"] = "Mean field Markov decision processes", ["xml:lang"] = "en", }, }, table#1530 { ["itemType"] = table#1531 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1532 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2978077", }, ["subjectLabel"] = table#1533 { ["type"] = "literal", ["value"] = "Stochastic switching for partially observable dynamics and optimal asset allocation", ["xml:lang"] = "en", }, }, table#1534 { ["itemType"] = table#1535 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1536 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5128864", }, ["subjectLabel"] = table#1537 { ["type"] = "literal", ["value"] = "Experience replay–based output feedback Q‐learning scheme for optimal output tracking control of discrete‐time linear systems", ["xml:lang"] = "en", }, }, table#1538 { ["itemType"] = table#1539 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1540 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5131481", }, ["subjectLabel"] = table#1541 { ["type"] = "literal", ["value"] = "On the Taylor Expansion of Value Functions", ["xml:lang"] = "en", }, }, table#1542 { ["itemType"] = table#1543 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1544 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5131714", }, ["subjectLabel"] = table#1545 { ["type"] = "literal", ["value"] = "Benchmarking a Scalable Approximate Dynamic Programming Algorithm for Stochastic Control of Grid-Level Energy Storage", ["xml:lang"] = "en", }, }, table#1546 { ["itemType"] = table#1547 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1548 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5132232", }, ["subjectLabel"] = table#1549 { ["type"] = "literal", ["value"] = "Rectified deep neural networks overcome the curse of dimensionality for nonsmooth value functions in zero-sum games of nonlinear stiff systems", ["xml:lang"] = "en", }, }, table#1550 { ["itemType"] = table#1551 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1552 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5136077", }, ["subjectLabel"] = table#1553 { ["type"] = "literal", ["value"] = "Spare Parts Inventory Management with Substitution-Dependent Reliability", ["xml:lang"] = "en", }, }, table#1554 { ["itemType"] = table#1555 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1556 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3539892", }, ["subjectLabel"] = table#1557 { ["type"] = "literal", ["value"] = "Optimal empty vehicle redistribution for hub‐and‐spoke transportation systems", ["xml:lang"] = "en", }, }, table#1558 { ["itemType"] = table#1559 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1560 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3297666", }, ["subjectLabel"] = table#1561 { ["type"] = "literal", ["value"] = "Deceptive Reinforcement Learning Under Adversarial Manipulations on Cost Signals", ["xml:lang"] = "en", }, }, table#1562 { ["itemType"] = table#1563 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1564 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3300501", }, ["subjectLabel"] = table#1565 { ["type"] = "literal", ["value"] = "Parallel Optimization Techniques for Machine Learning", ["xml:lang"] = "en", }, }, table#1566 { ["itemType"] = table#1567 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1568 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3305109", }, ["subjectLabel"] = table#1569 { ["type"] = "literal", ["value"] = "On Generalized Bellman Equations and Temporal-Difference Learning", ["xml:lang"] = "en", }, }, table#1570 { ["itemType"] = table#1571 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1572 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3305576", }, ["subjectLabel"] = table#1573 { ["type"] = "literal", ["value"] = "An Overview for Markov Decision Processes in Queues and Networks", ["xml:lang"] = "en", }, }, table#1574 { ["itemType"] = table#1575 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1576 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3006026", }, ["subjectLabel"] = table#1577 { ["type"] = "literal", ["value"] = "Towards Min Max Generalization in Reinforcement Learning", ["xml:lang"] = "en", }, }, table#1578 { ["itemType"] = table#1579 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1580 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5151934", }, ["subjectLabel"] = table#1581 { ["type"] = "literal", ["value"] = "Deep Neural Networks Algorithms for Stochastic Control Problems on Finite Horizon: Convergence Analysis", ["xml:lang"] = "en", }, }, table#1582 { ["itemType"] = table#1583 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1584 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4641680", }, ["subjectLabel"] = table#1585 { ["type"] = "literal", ["value"] = "Decomposition Methods for Computing Directional Stationary Solutions of a Class of Nonsmooth Nonconvex Optimization Problems", ["xml:lang"] = "en", }, }, table#1586 { ["itemType"] = table#1587 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1588 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5153603", }, ["subjectLabel"] = table#1589 { ["type"] = "literal", ["value"] = "Q-Learning for Distributionally Robust Markov Decision Processes", ["xml:lang"] = "en", }, }, table#1590 { ["itemType"] = table#1591 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1592 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5157257", }, ["subjectLabel"] = table#1593 { ["type"] = "literal", ["value"] = "Adaptive Learning Algorithm Convergence in Passive and Reactive Environments", ["xml:lang"] = "en", }, }, table#1594 { ["itemType"] = table#1595 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1596 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5053195", }, ["subjectLabel"] = table#1597 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5053195", }, }, table#1598 { ["itemType"] = table#1599 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1600 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5158383", }, ["subjectLabel"] = table#1601 { ["type"] = "literal", ["value"] = "Adaptive Robust Control in Continuous Time", ["xml:lang"] = "en", }, }, table#1602 { ["itemType"] = table#1603 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1604 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3564534", }, ["subjectLabel"] = table#1605 { ["type"] = "literal", ["value"] = "Robust Optimizers for Nonlinear Programming in Approximate Dynamic Programming", ["xml:lang"] = "en", }, }, table#1606 { ["itemType"] = table#1607 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1608 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5054599", }, ["subjectLabel"] = table#1609 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5054599", }, }, table#1610 { ["itemType"] = table#1611 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1612 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5160799", }, ["subjectLabel"] = table#1613 { ["type"] = "literal", ["value"] = "Control of chaotic systems by deep reinforcement learning", ["xml:lang"] = "en", }, }, table#1614 { ["itemType"] = table#1615 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1616 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3568365", }, ["subjectLabel"] = table#1617 { ["type"] = "literal", ["value"] = "Reward-Modulated Hebbian Learning of Decision Making", ["xml:lang"] = "en", }, }, table#1618 { ["itemType"] = table#1619 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1620 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5162625", }, ["subjectLabel"] = table#1621 { ["type"] = "literal", ["value"] = "Is Temporal Difference Learning Optimal? An Instance-Dependent Analysis", ["xml:lang"] = "en", }, }, table#1622 { ["itemType"] = table#1623 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1624 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4558492", }, ["subjectLabel"] = table#1625 { ["type"] = "literal", ["value"] = "Risk-Constrained Reinforcement Learning with Percentile Risk Criteria", ["xml:lang"] = "en", }, }, table#1626 { ["itemType"] = table#1627 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1628 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5166474", }, ["subjectLabel"] = table#1629 { ["type"] = "literal", ["value"] = "Variance-penalized Markov decision processes: dynamic programming and reinforcement learning techniques", ["xml:lang"] = "en", }, }, table#1630 { ["itemType"] = table#1631 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1632 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5168016", }, ["subjectLabel"] = table#1633 { ["type"] = "literal", ["value"] = "Finite horizon optimal control of non-linear discrete-time switched systems using adaptive dynamic programming with ε-error bound", ["xml:lang"] = "en", }, }, table#1634 { ["itemType"] = table#1635 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1636 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4665861", }, ["subjectLabel"] = table#1637 { ["type"] = "literal", ["value"] = "Optimal Dynamic Treatment Regimes", ["xml:lang"] = "en", }, }, table#1638 { ["itemType"] = table#1639 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1640 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5189863", }, ["subjectLabel"] = table#1641 { ["type"] = "literal", ["value"] = "Derivatives of Logarithmic Stationary Distributions for Policy Gradient Reinforcement Learning", ["xml:lang"] = "en", }, }, table#1642 { ["itemType"] = table#1643 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1644 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3057528", }, ["subjectLabel"] = table#1645 { ["type"] = "literal", ["value"] = "Decomposition of large-scale stochastic optimal control problems", ["xml:lang"] = "en", }, }, table#1646 { ["itemType"] = table#1647 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1648 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4684960", }, ["subjectLabel"] = table#1649 { ["type"] = "literal", ["value"] = "Computable approximations for average Markov decision processes in continuous time", ["xml:lang"] = "en", }, }, table#1650 { ["itemType"] = table#1651 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1652 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2792734", }, ["subjectLabel"] = table#1653 { ["type"] = "literal", ["value"] = "Stability and monotone convergence of generalised policy iteration for discrete-time linear quadratic regulations", ["xml:lang"] = "en", }, }, table#1654 { ["itemType"] = table#1655 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1656 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2792939", }, ["subjectLabel"] = table#1657 { ["type"] = "literal", ["value"] = "Convergence of the standard RLS method andUDU^Tfactorisation of covariance matrix for solving the algebraic Riccati equation of the DLQR via heuristic approximate dynamic programming", ["xml:lang"] = "en", }, }, table#1658 { ["itemType"] = table#1659 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1660 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5198052", }, ["subjectLabel"] = table#1661 { ["type"] = "literal", ["value"] = "A rollout algorithm for the resource constrained elementary shortest path problem", ["xml:lang"] = "en", }, }, table#1662 { ["itemType"] = table#1663 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1664 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5096718", }, ["subjectLabel"] = table#1665 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5096718", }, }, table#1666 { ["itemType"] = table#1667 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1668 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3612121", }, ["subjectLabel"] = table#1669 { ["type"] = "literal", ["value"] = "A Spiking Neural Network Model of an Actor-Critic Learning Agent", ["xml:lang"] = "en", }, }, table#1670 { ["itemType"] = table#1671 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1672 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5205609", }, ["subjectLabel"] = table#1673 { ["type"] = "literal", ["value"] = "Continuous-Time Robust Dynamic Programming", ["xml:lang"] = "en", }, }, table#1674 { ["itemType"] = table#1675 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1676 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3616977", }, ["subjectLabel"] = table#1677 { ["type"] = "literal", ["value"] = "Opportunistic Transmission over Randomly Varying Channels", ["xml:lang"] = "en", }, }, table#1678 { ["itemType"] = table#1679 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1680 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3624562", }, ["subjectLabel"] = table#1681 { ["type"] = "literal", ["value"] = "Simultaneous Optimal Control and Discrete Stochastic Sensor Selection", ["xml:lang"] = "en", }, }, table#1682 { ["itemType"] = table#1683 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1684 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3626047", }, ["subjectLabel"] = table#1685 { ["type"] = "literal", ["value"] = "Value and Policy Function Approximations in Infinite-Horizon Optimization Problems", ["xml:lang"] = "en", }, }, table#1686 { ["itemType"] = table#1687 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1688 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2814547", }, ["subjectLabel"] = table#1689 { ["type"] = "literal", ["value"] = "Hebbian Versus Gradient Training of ESN Actors in Closed-Loop ACD", ["xml:lang"] = "en", }, }, table#1690 { ["itemType"] = table#1691 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1692 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3382776", }, ["subjectLabel"] = table#1693 { ["type"] = "literal", ["value"] = "Regularity and Stability of Feedback Relaxed Controls", ["xml:lang"] = "en", }, }, table#1694 { ["itemType"] = table#1695 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1696 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4709749", }, ["subjectLabel"] = table#1697 { ["type"] = "literal", ["value"] = "New Rollout Algorithms for Combinatorial Optimization Problems", ["xml:lang"] = "en", }, }, table#1698 { ["itemType"] = table#1699 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1700 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5219554", }, ["subjectLabel"] = table#1701 { ["type"] = "literal", ["value"] = "Risk-Averse Approximate Dynamic Programming with Quantile-Based Risk Measures", ["xml:lang"] = "en", }, }, table#1702 { ["itemType"] = table#1703 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1704 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4971589", }, ["subjectLabel"] = table#1705 { ["type"] = "literal", ["value"] = "Bayesian Exploration for Approximate Dynamic Programming", ["xml:lang"] = "en", }, }, table#1706 { ["itemType"] = table#1707 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1708 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3386792", }, ["subjectLabel"] = table#1709 { ["type"] = "literal", ["value"] = "Transient-State Natural Gas Transmission in Gunbarrel Pipeline Networks", ["xml:lang"] = "en", }, }, table#1710 { ["itemType"] = table#1711 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1712 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3387904", }, ["subjectLabel"] = table#1713 { ["type"] = "literal", ["value"] = "A Block Successive Upper-Bound Minimization Method of Multipliers for Linearly Constrained Convex Optimization", ["xml:lang"] = "en", }, }, table#1714 { ["itemType"] = table#1715 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1716 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5222718", }, ["subjectLabel"] = table#1717 { ["type"] = "literal", ["value"] = "Output‐feedback H_∞ quadratic tracking control of linear systems using reinforcement learning", ["xml:lang"] = "en", }, }, table#1718 { ["itemType"] = table#1719 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1720 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3638498", }, ["subjectLabel"] = table#1721 { ["type"] = "literal", ["value"] = "Challenges in Enterprise Wide Optimization for the Process Industries", ["xml:lang"] = "en", }, }, table#1722 { ["itemType"] = table#1723 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1724 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5225430", }, ["subjectLabel"] = table#1725 { ["type"] = "literal", ["value"] = "Projection algorithms with dynamic stepsize for constrained composite minimization", ["xml:lang"] = "en", }, }, table#1726 { ["itemType"] = table#1727 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1728 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5227201", }, ["subjectLabel"] = table#1729 { ["type"] = "literal", ["value"] = "A perturbation approach to approximate value iteration for average cost Markov decision processes with Borel spaces and bounded costs", ["xml:lang"] = "en", }, }, table#1730 { ["itemType"] = table#1731 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1732 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4979399", }, ["subjectLabel"] = table#1733 { ["type"] = "literal", ["value"] = "Suboptimal Policies for Stochastic $$N$$-Stage Optimization: Accuracy Analysis and a Case Study from Optimal Consumption", ["xml:lang"] = "en", }, }, table#1734 { ["itemType"] = table#1735 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1736 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2826683", }, ["subjectLabel"] = table#1737 { ["type"] = "literal", ["value"] = "A sequential updating scheme of the Lagrange multiplier for separable convex programming", ["xml:lang"] = "en", }, }, table#1738 { ["itemType"] = table#1739 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1740 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4462692", }, ["subjectLabel"] = table#1741 { ["type"] = "literal", ["value"] = "On the structure of value functions for threshold policies in queueing models", ["xml:lang"] = "en", }, }, table#1742 { ["itemType"] = table#1743 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1744 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2828332", }, ["subjectLabel"] = table#1745 { ["type"] = "literal", ["value"] = "On the Computational Complexity of Minimum-Concave-Cost Flow in a Two-Dimensional Grid", ["xml:lang"] = "en", }, }, table#1746 { ["itemType"] = table#1747 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1748 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3646118", }, ["subjectLabel"] = table#1749 { ["type"] = "literal", ["value"] = "Bounds for Multistage Stochastic Programs Using Supervised Learning Strategies", ["xml:lang"] = "en", }, }, table#1750 { ["itemType"] = table#1751 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1752 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2831390", }, ["subjectLabel"] = table#1753 { ["type"] = "literal", ["value"] = "Q( $$\\lambda $$ ) with Off-Policy Corrections", ["xml:lang"] = "en", }, }, table#1754 { ["itemType"] = table#1755 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1756 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4999359", }, ["subjectLabel"] = table#1757 { ["type"] = "literal", ["value"] = "Finite-Time Performance of Distributed Temporal-Difference Learning with Linear Function Approximation", ["xml:lang"] = "en", }, }, table#1758 { ["itemType"] = table#1759 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1760 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3120605", }, ["subjectLabel"] = table#1761 { ["type"] = "literal", ["value"] = "Robust shortest path planning and semicontractive dynamic programming", ["xml:lang"] = "en", }, }, table#1762 { ["itemType"] = table#1763 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1764 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3931287", }, ["subjectLabel"] = table#1765 { ["type"] = "literal", ["value"] = "Optimal control of a class of nonlinear stochastic systems", ["xml:lang"] = "en", }, }, table#1766 { ["itemType"] = table#1767 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1768 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5152162", }, ["subjectLabel"] = table#1769 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5152162", }, }, table#1770 { ["itemType"] = table#1771 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1772 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5003423", }, ["subjectLabel"] = table#1773 { ["type"] = "literal", ["value"] = "Adaptive dynamic programming for model‐free tracking of trajectories with time‐varying parameters", ["xml:lang"] = "en", }, }, table#1774 { ["itemType"] = table#1775 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1776 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3424512", }, ["subjectLabel"] = table#1777 { ["type"] = "literal", ["value"] = "Approximate dynamic programming methods for an inventory allocation problem under uncertainty", ["xml:lang"] = "en", }, }, table#1778 { ["itemType"] = table#1779 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1780 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3427060", }, ["subjectLabel"] = table#1781 { ["type"] = "literal", ["value"] = "QUANTUM COMPUTATION FOR ACTION SELECTION USING REINFORCEMENT LEARNING", ["xml:lang"] = "en", }, }, table#1782 { ["itemType"] = table#1783 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1784 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5009779", }, ["subjectLabel"] = table#1785 { ["type"] = "literal", ["value"] = "Finite-Time Analysis and Restarting Scheme for Linear Two-Time-Scale Stochastic Approximation", ["xml:lang"] = "en", }, }, table#1786 { ["itemType"] = table#1787 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1788 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5254881", }, ["subjectLabel"] = table#1789 { ["type"] = "literal", ["value"] = "Convergence Rates and Decoupling in Linear Stochastic Approximation Algorithms", ["xml:lang"] = "en", }, }, table#1790 { ["itemType"] = table#1791 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1792 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q3130440", }, ["subjectLabel"] = table#1793 { ["type"] = "literal", ["value"] = "Stable Optimal Control and Semicontractive Dynamic Programming", ["xml:lang"] = "en", }, }, table#1794 { ["itemType"] = table#1795 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1796 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4636981", }, ["subjectLabel"] = table#1797 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q4636981", }, }, table#1798 { ["itemType"] = table#1799 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1800 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2868184", }, ["subjectLabel"] = table#1801 { ["type"] = "literal", ["value"] = "FLOW SHOP SCHEDULING WITH REINFORCEMENT LEARNING", ["xml:lang"] = "en", }, }, table#1802 { ["itemType"] = table#1803 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1804 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q2868847", }, ["subjectLabel"] = table#1805 { ["type"] = "literal", ["value"] = "Neural-network-observer-based optimal control for unknown nonlinear systems using adaptive dynamic programming", ["xml:lang"] = "en", }, }, table#1806 { ["itemType"] = table#1807 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1808 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5168862", }, ["subjectLabel"] = table#1809 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5168862", }, }, table#1810 { ["itemType"] = table#1811 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1812 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5168869", }, ["subjectLabel"] = table#1813 { ["type"] = "literal", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5168869", }, }, table#1814 { ["itemType"] = table#1815 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1816 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5027971", }, ["subjectLabel"] = table#1817 { ["type"] = "literal", ["value"] = "Finite-horizon optimal control for continuous-time uncertain nonlinear systems using reinforcement learning", ["xml:lang"] = "en", }, }, [0] = table#1818 { ["itemType"] = table#1819 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5976449", }, ["subject"] = table#1820 { ["type"] = "uri", ["value"] = "https://portal.mardi4nfdi.de/entity/Q5882386", }, ["subjectLabel"] = table#1821 { ["type"] = "literal", ["value"] = "Least squares policy iteration with instrumental variables vs. direct policy search: comparison against optimal benchmarks using energy storage", ["xml:lang"] = "en", }, }, }, }, } table#1 { ["head"] = table#2 { ["vars"] = table#3 { "propertyLabel", "value", "valueLabel", [0] = "property", }, }, ["results"] = table#4 { ["bindings"] = table#5 { }, }, } table#1 { ["head"] = table#2 { ["vars"] = table#3 { "propertyLabel", "value", "valueLabel", [0] = "property", }, }, ["results"] = table#4 { ["bindings"] = table#5 { }, }, }

scientific article; zbMATH DE number 1321699