[home] [search] |
BibTeX of Marcus Hutter |
[contact] [up] |

## %-------------Publications-of-Marcus-Hutter-2021--------------%

@Article{Hutter:21clogames, author = "Mikael Böörs and Tobias Wängberg and Tom Everitt and Marcus Hutter", title = "Classification by Decomposition: A Novel Approach to Classification of Symmetric 2 x 2 Games", journal = "Theory and Decision", volume = "TBA", number = "TBA", pages = "TBA 1--46", publisher = "Springer", _month = dec, year = "2021/2022", bibtex = "http://www.hutter1.net/official/bib.htm#clogames", xurl = "http://arxiv.org/abs/2112.none", pdf = "http://www.hutter1.net/publ/clogames.pdf", project = "http://www.hutter1.net/official/projects.htm#agents", issn = "1573-7187", doi = "10.1007/s11238-021-09850-z", keywords = "Classification; Symmetric games; 2×2 Games; Decomposition; Cooperation and conflict; Simplicity", abstract = "In this paper, we provide a detailed review of previous classifications of 2×2 games and suggest a mathematically simple way to classify the symmetric 2×2 games based on a decomposition of the payoff matrix into a cooperative and a zero-sum part. We argue that differences in the interaction between the parts is what makes games interesting in different ways. Our claim is supported by evolutionary computer experiments and findings in previous literature. In addition, we provide a method for using a stereographic projection to create a compact 2-d representation of the game space.", }

@TechReport{Hutter:21isotuning, author = "Laurent Orseau and Marcus Hutter", title = "Isotuning with Applications to Scale-Free Online Learning", institution = "DeepMind", address = "London", number = "http://arxiv.org/abs/2112.14586", pages = "1--32", _month = dec, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#isotuning", url = "http://arxiv.org/abs/2112.14586", pdf = "http://www.hutter1.net/publ/isotuning.pdf", project = "http://www.hutter1.net/official/projects.htm#mixed", keywords = "online learning; convex optimization; regret analysis; adaptive learning rate; scale-free; anytime; unbounded loss; unbounded domain", abstract = "We extend and combine several tools of the literature to design fast, adaptive, anytime and scale-free online learning algorithms. Scale-free regret bounds must scale linearly with the maximum loss, both toward large losses and toward very small losses. Adaptive regret bounds demonstrate that an algorithm can take advantage of easy data and potentially have constant regret. We seek to develop fast algorithms that depend on as few parameters as possible, in particular they should be anytime and thus not depend on the time horizon. Our first and main tool, isotuning, is a generalization of the idea of balancing the trade-off of the regret. We develop a set of tools to design and analyze such learning rates easily and show that they adapt automatically to the rate of the regret (whether constant, O(log T), O(√T), etc.) within a factor 2 of the optimal learning rate in hindsight for the same observed quantities. The second tool is an online correction, which allows us to obtain centered bounds for many algorithms, to prevent the regret bounds from being vacuous when the domain is overly large or only partially constrained. The last tool, null updates, prevents the algorithm from performing overly large updates, which could result in unbounded regret, or even invalid updates. We develop a general theory using these tools and apply it to several standard algorithms. In particular, we (almost entirely) restore the adaptivity to small losses of FTRL for unbounded domains, design and prove scale-free adaptive guarantees for a variant of Mirror Descent (at least when the Bregman divergence is convex in its second argument), extend Adapt-ML-Prod to scale-free guarantees, and provide several other minor contributions about Prod, AdaHedge, BOA and Soft-Bayes.", }

@InProceedings{Hutter:21symintel, author = "Samuel Allen Alexander and Marcus Hutter", title = "Reward-Punishment Symmetric Universal Intelligence", booktitle = "Proc. 14th Conf. on Artificial General Intelligence ({AGI'21})", address = "San Francisco, USA", series = "LNAI", xvolume = "??", xpages = "??", _editor = "Ben Goertzel and Matthew Iklé and Alexey Potapov", publisher = "Springer", _month = oct, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#symintel", url = "http://arxiv.org/abs/2110.02450", pdf = "http://www.hutter1.net/publ/symintel.pdf", slides = "http://www.hutter1.net/publ/ssymintel.pdf", video = "http://youtu.be/CnsqHSCBgX0?t=30250", qanda = "http://youtu.be/CnsqHSCBgX0?t=32165", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "TBA", issn = "0302-9743", xisbn = "??", keywords = "Universal intelligence; Intelligence measures; Reinforcement learning.", abstract = "Can an agent's intelligence level be negative? We extend the Legg-Hutter agent-environment framework to include punishments and argue for an affirmative answer to that question. We show that if the background encodings and Universal Turing Machine (UTM) admit certain Kolmogorov complexity symmetries, then the resulting Legg-Hutter intelligence measure is symmetric about the origin. In particular, this implies reward-ignoring agents have Legg-Hutter intelligence 0 according to such UTMs.", }

@TechReport{Hutter:21causalseq, author = "Pedro A. Ortega, Markus Kunesch, Grégoire Delétang, Tim Genewein, Jordi Grau-Moya, Joel Veness, Jonas Buchli, Jonas Degrave, Bilal Piot, Julien Perolat, Tom Everitt, Corentin Tallec, Emilio Parisotto, Tom Erez, Yutian Chen, Scott Reed, Marcus Hutter, Nando de Freitas, Shane Legg", title = "Shaking the Foundations: Delusions in Sequence Models for Interaction and Control", institution = "DeepMind", address = "London", number = "http://arxiv.org/abs/2110.10819", pages = "1--16", _month = oct, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#causalseq", url = "http://arxiv.org/abs/2110.10819", pdf = "http://www.hutter1.net/publ/causalseq.pdf", project = "http://www.hutter1.net/official/projects.htm#mixed", keywords = "sequence models; sequential prediction; reinforcement learning; causality; self-delusion", abstract = "The recent phenomenal success of language models has reinvigorated machine learning research, and large sequence models such as transformers are being applied to a variety of domains. One important problem class that has remained relatively elusive however is purposeful adaptive behavior. Currently there is a common perception that sequence models "lack the understanding of the cause and effect of their actions" leading them to draw incorrect inferences due to auto-suggestive delusions. In this report we explain where this mismatch originates, and show that it can be resolved by treating actions as causal interventions. Finally, we show that in supervised learning, one can teach a system to condition or intervene on data by training with factual and counterfactual error signals respectively.", }

@Article{Hutter:21compcon, author = "Elliot Catt and Marcus Hutter and Joel Veness", title = "Reinforcement Learning with Information-Theoretic Actuation", journal = "arXiv", pages = "1--11", _month = sep, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#compcon", url = "http://arxiv.org/abs/2109.15147", pdf = "http://www.hutter1.net/publ/compcon.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", keywords = "Reinforcement Learning; large action spaces; compression; coding; internal actions; sampling.", abstract = "Reinforcement Learning formalises an embodied agent's interaction with the environment through observations, rewards and actions. But where do the actions come from? Actions are often considered to represent something external, such as the movement of a limb, a chess piece, or more generally, the output of an actuator. In this work we explore and formalize a contrasting view, namely that actions are best thought of as the output of a sequence of internal choices with respect to an action model. This view is particularly well-suited for leveraging the recent advances in large sequence models as prior knowledge for multi-task reinforcement learning problems. Our main contribution in this work is to show how to augment the standard MDP formalism with a sequential notion of internal action using information-theoretic techniques, and that this leads to self-consistent definitions of both internal and external action value functions.", support = "ARC grant DP150104590", }

@Article{Hutter:21ai4hum, author = "Reinhard Hutter and Marcus Hutter", title = "Chances and Risks of Artificial Intelligence — A Concept of Developing and Exploiting Machine Intelligence for Future Societies", journal = "Applied System Innovation", volume = "4", number = "2", pages = "1--19", publisher = "MDPI", _month = jun, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#ai4hum", http = "https://www.mdpi.com/2571-5577/4/2/37", xurl = "http://arxiv.org/abs/2106.none", pdf = "http://www.hutter1.net/publ/ai4hum.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", issn = "2571-5577", doi = "10.3390/asi4020037", keywords = "artificial and human intelligence; security; risks and risk management; quality of life; common welfare; socio-political assessment", abstract = "Artificial Intelligence (AI): Boon or Bane for societies? AI technologies and solutions—as most revolutionary technologies have done in the past—offer negative implications on the one hand and considerable positive potential on the other. Avoiding the former and fostering the latter will require substantial investments in future societal concepts, research and development, and control of AI-based solutions in AI security while avoiding abuse. Preparation for the future role of AI in societies should strive towards the implementation of related methods and tools for risk management, models of complementary human–machine cooperation, strategies for the optimization of production and administration, and innovative concepts for the distribution of the economic value created. Two extreme possible “end states” of AI impact (if there is ever an end state) that are being discussed at present may manifest as (a) uncontrolled substitution by AI of major aspects of production, services, and administrative and decision-making processes, leading to unprecedented risks such as high unemployment, and devaluation and the underpayment of people in paid work, resulting in inequality in the distribution of wealth and employment, diminishing social peace, social cohesion, solidarity, security, etc., or, on the contrary, (b) the freeing of people from routine labor through increased automation in production, administration and services, and changing the constitution of politics and societies into constituencies with high ethical standards, personal self-determination, and the general dominance of humane principles, as opposed to pure materialism. Any mix of these two extremes could develop, and these combinations may vary among different societies and political systems.", }

@Article{Hutter:21alignx, author = "Tom Everitt and Marcus Hutter and Ramana Kumar and Victoria Krakovna", title = "Reward Tampering Problems and Solutions in Reinforcement Learning: A Causal Influence Diagram Perspective", journal = "Synthese", xvolume = "??", xnumber = "??", xpages = "??-??", publisher = "Springer", _month = may, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#alignx", url = "http://arxiv.org/abs/1908.04734", pdf = "http://www.hutter1.net/publ/alignx.pdf", slides = "http://www.hutter1.net/publ/salign.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", code = "http://www.hutter1.net/publ/align.cpp", issn = "0039-7857", doi = "10.1007/s11229-021-03141-4", keywords = "AI safety, reinforcement learning, Bayesian learning, causal graphs", abstract = "Can humans get arbitrarily capable reinforcement learning (RL) agents to do their bidding? Or will sufficiently capable RL agents always find ways to bypass their intended objectives by shortcutting their reward signal? This question impacts how far RL can be scaled, and whether alternative paradigms must be developed in order to build safe artificial general intelligence. In this paper, we study when an RL agent has an instrumental goal to tamper with its reward process, and describe design principles that prevent instrumental goals for two different types of reward tampering (reward function tampering and RF-input tampering). Combined, the design principles can prevent both types of reward tampering from being instrumental goals. The analysis benefits from causal influence diagrams to provide intuitive yet precise formalizations.", support = "ARC grant DP150104590", for = "080101(60%),220312(20%),080198(20%)", seo = "970108(80%),970117(20%)", }

@Article{Hutter:21ccamfrl, author = "Thomas Mesnard, Théophane Weber, Fabio Viola, Shantanu Thakoor, Alaa Saade, Anna Harutyunyan, Will Dabney, Tom Stepleton, Nicolas Heess, Arthur Guez, Marcus Hutter, Lars Buesing, Rémi Munos", title = "Counterfactual Credit Assignment in Model-Free Reinforcement Learning", journal = "Journal of Machine Learning Research, W\&CP: ICML", volume = "139", pages = "7654--7664", _editor = "Marina Meila and Tong Zhang", _month = jul, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#ccamfrl", url = "http://arxiv.org/abs/2011.09464", pdf = "http://www.hutter1.net/publ/ccamfrl.pdf", slides = "https://icml.cc/virtual/2021/poster/9795", project = "http://www.hutter1.net/official/projects.htm#rl", keywords = "Reinforcement Learning; Planning; Deep RL", abstract = "Credit assignment in reinforcement learning is the problem of measuring an action’s influence on future rewards. In particular, this requires separating skill from luck, i.e. disentangling the effect of an action on rewards from that of external factors and subsequent actions. To achieve this, we adapt the notion of counterfactuals from causality theory to a model-free RL setup. The key idea is to condition value functions on future events, by learning to extract relevant information from a trajectory. We formulate a family of policy gradient algorithms that use these future-conditional value functions as baselines or critics, and show that they are provably low variance. To avoid the potential bias from conditioning on future information, we constrain the hindsight information to not contain information about the agent's actions. We demonstrate the efficacy and validity of our algorithm on a number of illustrative and challenging problems.", znote = "Acceptance rate: 1184/5513 = 21\%", }

@Article{Hutter:21ckillcat, author = "Michael K. Cohen and Marcus Hutter and Elliot Catt", title = "Curiosity Killed or Incapacitated the Cat and the Asymptotically Optimal Agent", journal = "IEEE Journal on Selected Areas in Information Theory", volume = "2", number = "2", pages = "665--677", publisher = "IEEE", _month = may, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#ckillcat", url = "http://arxiv.org/abs/2006.03357", pdf = "http://www.hutter1.net/publ/ckillcat.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", issn = "2641-8770", doi = "10.1109/JSAIT.2021.3079722", keywords = "Artificial intelligence; learning; autonomous agents; Bayes methods; information theory; inference algorithms; history; reinforcement learning; Markov processes", abstract = "Reinforcement learners are agents that learn to pick actions that lead to high reward. Ideally, the value of a reinforcement learner’s policy approaches optimality—where the optimal informed policy is the one which maximizes reward. Unfortunately, we show that if an agent is guaranteed to be “asymptotically optimal” in any (stochastically computable) environment, then subject to an assumption about the true environment, this agent will be either “destroyed” or “incapacitated” with probability 1. Much work in reinforcement learning uses an ergodicity assumption to avoid this problem. Often, doing theoretical research under simplifying assumptions prepares us to provide practical solutions even in the absence of those assumptions, but the ergodicity assumption in reinforcement learning may have led us entirely astray in preparing safe and effective exploration strategies for agents in dangerous environments. Rather than assuming away the problem, we present an agent, Mentee, with the modest guarantee of approaching the performance of a mentor, doing safe exploration instead of reckless exploration. Critically, Mentee’s exploration probability depends on the expected information gain from exploring. In a simple non-ergodic environment with a weak mentor, we find Mentee outperforms existing asymptotically optimal agents and its mentor.", support = "ARC grant DP150104590", }

@patent{Hutter:21glcbpatent, author = "Eren Sezener and Joel Veness and Marcus Hutter and Jianan Wang and David Budden", title = "Gated Linear Contextual Bandits", _month = apr, year = "2021", number = "WO2021069574A1", appl_number = "PCT/EP2020/078259", _another_number = "45288-0091 WO1", journal = "DeepMind", url = "https://worldwide.espacenet.com/patent/search?q=pn%3DWO2021069574A1", pdf = "http://www.hutter1.net/publ/glcbpatent.pdf", type = "patent", abstract = "Methods, systems, and apparatus, including computer programs encoded on computer storage media, for selecting actions in response to each context in a sequence of context inputs. One of the methods includes maintaining data specifying a respective gated linear network corresponding to each of the plurality of actions; for each context in the sequence of contexts: for each action, processing the context using the gated linear network corresponding to the action to generate a predicted probability; for each action, generating an action score for the action from at least the predicted probability; and selecting the action to be performed in response to the context based on the action scores.", }

@Article{Hutter:21bomaix, author = "Michael K. Cohen and Badri Vellambi and Marcus Hutter", title = "Intelligence and Unambitiousness Using Algorithmic Information Theory", journal = "IEEE Journal on Selected Areas in Information Theory", volume = "2", number = "2", pages = "678--690", publisher = "IEEE", _month = apr, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#bomaix", url = "http://arxiv.org/abs/2105.06268", pdf = "http://www.hutter1.net/publ/bomaix.pdf", slides = "http://www.hutter1.net/publ/sbomai.pdf", poster = "http://www.hutter1.net/publ/pbomai.pdf", press = "http://medium.com/analytics-vidhya/paper-summary-asymptotically-unambitious-artificial-general-intelligence-cohen-et-al-a5d091d501db", project = "http://www.hutter1.net/official/projects.htm#safe", issn = "2641-8770", doi = "10.1109/JSAIT.2021.3073844", keywords = "information theory; task analysis; computational modeling; history; schedules; Bayes methods; artificial general intelligence; existental threat; alignment problem; power; instrumental goal; reinforcement learning; inference algorithms; autonomous agents; learning", abstract = "Algorithmic Information Theory has inspired intractable constructions of general intelligence (AGI), and undiscovered tractable approximations are likely feasible. Reinforcement Learning (RL), the dominant paradigm by which an agent might learn to solve arbitrary solvable problems, gives an agent a dangerous incentive: to gain arbitrary “power” in order to intervene in the provision of their own reward. We review the arguments that generally intelligent algorithmic-informationtheoretic reinforcement learners such as Hutter’s 2 AIXI would seek arbitrary power, including over us. Then, using an information-theoretic exploration schedule, and a setup inspired by causal influence theory, we present a variant of AIXI which learns to not seek arbitrary power; we call it “unambitious”. We show that our agent learns to accrue reward at least as well as a human mentor, while relying on that mentor with diminishing probability. And given a formal assumption that we probe empirically, we show that eventually, the agent’s worldmodel incorporates the following true fact: intervening in the “outside world” will have no effect on reward acquisition; hence, it has no incentive to shape the outside world.", support = "ARC grant DP150104590", }

@TechReport{Hutter:21scaling, author = "Marcus Hutter", title = "Learning Curve Theory", institution = "DeepMind", address = "London", number = "http://arxiv.org/abs/2102.04074", _month = feb, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#scaling", url = "http://arxiv.org/abs/2102.04074", pdf = "http://www.hutter1.net/publ/scaling.pdf", slides = "http://www.hutter1.net/publ/sscaling.pdf", video1 = "http://youtu.be/q5YhJ8QDkMQ", video2 = "http://media.mis.mpg.de/mml/2021-03-04", project = "http://www.hutter1.net/official/projects.htm#mixed", keywords = "Power Law, Scaling, Learning Curve, Theory, Data Size, Error, Loss, Zipf", abstract = "Recently a number of empirical ``universal'' scaling law papers have been published, most notably by OpenAI. `Scaling laws' refers to power-law decreases of training or test error w.r.t.\ more data, larger neural networks, and/or more compute. In this work we focus on scaling w.r.t.\ data size $n$. Theoretical understanding of this phenomenon is in its infancy, except in finite-dimensional models for which error typically decreases with $n^{-1/2}$ or $n^{-1}$, where $n$ is the sample size. We develop and theoretically analyse the simplest possible (toy) model that can exhibit $n^{-β}$ learning curves for arbitrary power $β>0$, and determine to which extent power laws are universal or depend on the data distribution or loss function: Roughly, learning curves exhibit a power law with $β=\frac{α}{1+α}$ for Zipf-distributed data with exponent $1+α$, independent of the choice of loss. Furthermore, noise rapidly deteriorates/improves in instantaneous/time-averaged learning curves for increasing $n$, suggesting that model selection should better be based on cumulative (AUC) or time-averaged error, not final test error.", }

@InProceedings{Hutter:21binesa, author = "Sultan Javed Majeed and Marcus Hutter", title = "Exact Reduction of Huge Action Spaces in General Reinforcement Learning", booktitle = "Proc. 35th {AAAI} Conference on Artificial Intelligence ({AAAI'21})", address = "Virtual, Earth", volume = "35", publisher = "AAAI Press", _month = feb, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#binesa", url = "http://arxiv.org/abs/2012.10200", pdf = "http://www.hutter1.net/publ/binesa.pdf", slides = "http://www.hutter1.net/publ/sbinesa.pdf", poster = "http://www.hutter1.net/publ/pbinesa.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", keywords = "reinforcement learning; extreme state aggregation; action binarization; non-Markov", abstract = "The reinforcement learning (RL) framework formalizes the notion of learning with interactions. Many real-world problems have large state-spaces and/or action-spaces such as in Go, StarCraft, protein folding, and robotics or are non-Markovian, which cause significant challenges to RL algorithms. In this work we address the large action-space problem by sequentializing actions, which can reduce the action-space size significantly, even down to two actions at the expense of an increased planning horizon. We provide explicit and exact constructions and equivalence proofs for all quantities of interest for arbitrary history-based processes. In the case of MDPs, this could help RL algorithms that bootstrap. In this work we show how action-binarization in the non-MDP case can significantly improve Extreme State Aggregation (ESA) bounds. ESA allows casting any (non-MDP, non-ergodic, history-based) RL problem into a fixed-sized non-Markovian state-space with the help of a surrogate Markovian process. On the upside, ESA enjoys similar optimality guarantees as Markovian models do. But a downside is that the size of the aggregated state-space becomes exponential in the size of the action-space. In this work, we patch this issue by binarizing the action-space. We provide an upper bound on the number of states of this binarized ESA that is logarithmic in the original action-space size, a double-exponential improvement.", support = "ARC grant DP150104590", znote = "Acceptance rate: 1692/7911=21\%", }

@InProceedings{Hutter:21shortgln, author = "Joel Veness, Tor Lattimore, David Budden, Avishkar Bhoopchand, Christopher Mattern, Agnieszka Grabska-Barwinska, Eren Sezener, Jianan Wang, Peter Toth, Simon Schmitt, Marcus Hutter", title = "Gated Linear Networks", booktitle = "Proc. 35th {AAAI} Conference on Artificial Intelligence ({AAAI'21})", address = "Virtual, Earth", volume = "35", publisher = "AAAI Press", _month = feb, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#shortgln", url = "http://arxiv.org/abs/1910.01526", pdf = "http://www.hutter1.net/publ/shortgln.pdf", slides = "http://www.hutter1.net/publ/sshortgln.pdf", poster = "http://www.hutter1.net/publ/pshortgln.pdf", project = "http://www.hutter1.net/official/projects.htm#nn", press = "http://www.reddit.com/r/MachineLearning/comments/hx0q69/r_deepminds_gated_linear_networks_paper_and_code/", code = "http://github.com/aiwabdn/pygln", keywords = "gating; linear; network; geometric mixing; capacity; backpropagation-free; online convex optimization; resilience to catastrophic forgetting; density estimation; empirical evaluation; UCI; MNIST", abstract = "This paper presents a new family of backpropagation-free neural architectures, Gated Linear Networks (GLNs). What distinguishes GLNs from contemporary neural networks is the distributed and local nature of their credit assignment mechanism; each neuron directly predicts the target, forgoing the ability to learn feature representations in favor of rapid online learning. Individual neurons can model nonlinear functions via the use of data-dependent gating in conjunction with online convex optimization. We show that this architecture gives rise to universal learning capabilities in the limit, with effective model capacity increasing as a function of network size in a manner comparable with deep ReLU networks. Furthermore, we demonstrate that the GLN learning mechanism possesses extraordinary resilience to catastrophic forgetting, performing comparably to a MLP with dropout and Elastic Weight Consolidation on standard benchmarks. These desirable theoretical and empirical properties position GLNs as a complementary technique to contemporary offline deep learning methods.", znote = "Acceptance rate: 1692/7911=21\%", }

@InProceedings{Hutter:21outman, author = "Len Du and Marcus Hutter", title = "How Useful are Hand-crafted Data? Making Cases for Anomaly Detection Methods", booktitle = "54th Hawaii International Conference on System Sciences ({HICSS'21})", address = "Maui, Hawaii, USA", volume = "54", pages = "847--856", publisher = "ScholarSpace", _month = jan, year = "2021", bibtex = "http://www.hutter1.net/official/bib.htm#outman", http = "http://hdl.handle.net/10125/70716", pdf = "http://www.hutter1.net/publ/outman.pdf", slides = "http://www.hutter1.net/publ/soutman.pdf", project = "http://www.hutter1.net/official/projects.htm#mixed", isbn = "978-0-9981331-4-0", keywords = "accountability; evaluation; obscurity of AI algorithms; anomaly detection; evaluation; explainability; small data; testing AI", abstract = "While the importance of small data has been admitted in principle, they have not been widely adopted as a necessity in current machine learning or data mining research. Most predominantly, machine learning methods were typically evaluated under a “bigger is better” presumption. The more (and the more complex) data we could pour at a method, the better we thought we were at estimating its performance. We deem this mindset detrimental to interpretability, explainability, and the sustained development of the field. For example, despite that new outlier detection methods were often inspired by small, low dimensional samples, their performance has been exclusively evaluated by large, high-dimensional datasets resembling real-world use cases. With these “big data” we miss the chance to gain insights from close looks at how exactly the algorithms perform, as we mere humans cannot really comprehend the samples. In this work, we explore in the exactly opposite direction. We run several classical anomaly detection methods against small, mindfully crafted cases on which the results can be examined in detail. In addition to better understanding of these classical algorithms, our exploration has actually led to the discovery of some novel uses of classical anomaly detection methods to our surprise.", note = "Nominated for best paper award: http://hicss.hawaii.edu/best-papers/", znote = "Acceptance rate: 710/1449=49\%", }

## %-------------Publications-of-Marcus-Hutter-2020--------------%

@InProceedings{Hutter:20nnprune, author = "Laurent Orseau and Marcus Hutter and Omar Rivasplata", title = "Logarithmic Pruning is All You Need", booktitle = "Advances in Neural Information Processing Systems ({NeurIPS'20})", volume = "33", pages = "2925--2934", _editor = "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin", publisher = "Curran Associates", address = "Cambridge, MA, USA", _month = dec, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#nnprune", http = "http://papers.nips.cc/paper/2020/hash/1e9491470749d5b0e361ce4f0b24d037-Abstract.html", url = "http://arxiv.org/abs/2006.12156", pdf = "http://www.hutter1.net/publ/nnprune.pdf", poster = "http://www.hutter1.net/publ/pnnprune.pdf", video = "http://nips.cc/virtual/2020/public/poster_1e9491470749d5b0e361ce4f0b24d037.html", project = "http://www.hutter1.net/official/projects.htm#nn", keywords = "subnetwork; lottery ticket hypothesis; pruning; mixture of weights", abstract = "The Lottery Ticket Hypothesis is a conjecture that every large neural network contains a subnetwork that, when trained in isolation, achieves comparable performance to the large network. An even stronger conjecture has been proven recently: Every sufficiently overparameterized network contains a subnetwork that, even without training, achieves comparable accuracy to the trained large network. This theorem, however, relies on a number of strong assumptions and provides a loose polynomial factor on the size of the large network compared to the target function. In this work, we remove the most limiting assumptions of this previous work while providing significantly tighter bounds: the overparameterized network only needs to be a logarithmic factor in the accuracy larger than the target subnetwork.", znote = "Acceptance rate: 1900/9454=20\%. Spotlight: 280/9454=3\%", }

@InProceedings{Hutter:20:nctlfmn, author = "Jianan Wang and Eren Sezener and David Budden and Marcus Hutter and Joel Veness", title = "A Combinatorial Perspective on Transfer Learning", booktitle = "Advances in Neural Information Processing Systems ({NeurIPS'20})", volume = "33", pages = "918--929", _editor = "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin", publisher = "Curran Associates", address = "Cambridge, MA, USA", _month = dec, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#nctlfmn", http = "http://papers.nips.cc/paper/2020/hash/0a3b6f64f0523984e51323fe53b8c504-Abstract.html", url = "http://arxiv.org/abs/2010.12268", pdf = "http://www.hutter1.net/publ/nctlfmn.pdf", poster = "http://www.hutter1.net/publ/pnctlfmn.pdf", video = "http://nips.cc/virtual/2020/public/poster_0a3b6f64f0523984e51323fe53b8c504.html", project = "http://www.hutter1.net/official/projects.htm#nn", code = "http://github.com/aiwabdn/pygln", keywords = "gating; linear; network; transfer learning; online convex optimization; geometric mixing; resilience to catastrophic forgetting; forget me not process; empirical evaluation; MNIST", abstract = "Human intelligence is characterized not only by the capacity to learn complex skills, but the ability to rapidly adapt and acquire new skills within an ever-changing environment. In this work we study how the learning of modular solutions can allow for effective generalization to both unseen and potentially differently distributed data. Our main postulate is that the combination of task segmentation, modular learning and memory-based ensembling can give rise to generalization on an exponentially growing number of unseen tasks. We provide a concrete instantiation of this idea using a combination of: (1) the Forget-Me-Not Process, for task segmentation and memory based ensembling; and (2) Gated Linear Networks, which in contrast to contemporary deep learning techniques use a modular and local learning mechanism. We demonstrate that this system exhibits a number of desirable continual learning properties: robustness to catastrophic forgetting, no negative transfer and increasing levels of positive transfer as more tasks are seen. We show competitive performance against both offline and online methods on standard continual learning benchmarks.", znote = "Acceptance rate: 1900/9454=20\%", }

@InProceedings{Hutter:20banditgln, author = "Eren Sezener, Marcus Hutter, David Budden, Jianan Wang, Joel Veness", title = "Online Learning in Contextual Bandits using Gated Linear Networks", booktitle = "Advances in Neural Information Processing Systems ({NeurIPS'20})", volume = "33", pages = "19467--19477", _editor = "H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin", publisher = "Curran Associates", address = "Cambridge, MA, USA", _month = dec, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#banditgln", http = "http://papers.nips.cc/paper/2020/hash/e287f0b2e730059c55d97fa92649f4f2-Abstract.html", url = "http://arxiv.org/abs/2002.11611", pdf = "http://www.hutter1.net/publ/banditgln.pdf", poster = "http://www.hutter1.net/publ/pbanditgln.pdf", video = "http://nips.cc/virtual/2020/public/poster_e287f0b2e730059c55d97fa92649f4f2.html", project = "http://www.hutter1.net/official/projects.htm#nn", code = "http://github.com/aiwabdn/pygln", keywords = "gating; linear; network; contextual bandits; online convex optimization; geometric mixing", abstract = "We introduce a new and completely online contextual bandit algorithm called Gated Linear Contextual Bandits (GLCB). This algorithm is based on Gated Linear Networks (GLNs), a recently introduced deep learning architecture with properties well-suited to the online setting. Leveraging data-dependent gating properties of the GLN we are able to estimate prediction uncertainty with effectively zero algorithmic overhead. We empirically evaluate GLCB compared to 9 state-of-the-art algorithms that leverage deep neural networks, on a standard benchmark suite of discrete and continuous contextual bandit problems. GLCB obtains mean first-place despite being the only online method, and we further support these results with a theoretical study of its convergence properties.", znote = "Acceptance rate: 1900/9454=20\%", }

@TechReport{Hutter:20causalrl, author = "Thomas Mesnard, Théophane Weber, Fabio Viola, Shantanu Thakoor, Alaa Saade, Anna Harutyunyan, Will Dabney, Tom Stepleton, Nicolas Heess, Arthur Guez, Marcus Hutter, Lars Buesing, Rémi Munos", title = "Counterfactual Credit Assignment in Model-Free Reinforcement Learning", institution = "DeepMind", address = "London, UK", number = "arXiv:2011.09464", _month = nov, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#causalrl", url = "http://arxiv.org/abs/2011.09464", pdf = "http://www.hutter1.net/publ/causalrl.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", abstract = "Credit assignment in reinforcement learning is the problem of measuring an action’s influence on future rewards. In particular, this requires separating skill from luck, ie. disentangling the effect of an action on rewards from that of external factors and subsequent actions. To achieve this, we adapt the notion of counterfactuals from causality theory to a model-free RL setup. The key idea is to condition value functions on future events, by learning to extract relevant information from a trajectory. We then propose to use these as future-conditional baselines and critics in policy gradient algorithms and we develop a valid, practical variant with provably lower variance, while achieving unbiasedness by constraining the hindsight information not to contain information about the agent’s actions. We demonstrate the efficacy and validity of our algorithm on a number of illustrative problems.", }

@Article{Hutter:20gpt3agi, author = "Marcus Hutter", title = "GPT-3 and AGI", publisher = "Trusted Autonomous Systems", _month = aug, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#gpt3agi", http = "http://www.eventbrite.com.au/e/a-discussion-on-gpt-3-and-artificial-general-intelligence-tickets-116673544713", slides = "http://www.hutter1.net/publ/sgpt3agi.pdf", video = "http://youtu.be/E25Uk8WpYQE", project = "http://www.hutter1.net/official/projects.htm#agi", discussion = "http://youtu.be/aDFLp4A1EmY", keywords = "Deep Learning; Generative Pre-Trained Transformer; Inner Working; Artificial General Intellgence; Achievements; Limitations; Philosophy; Outlook", abstract = "GPT-3 stands for Generative Pre-trained Transformer 3. It is a gargantuan artificial Neural Network (NN) around the size of a mouse brain, trained on essentially the whole internet and millions of books. GPT-3 has demonstrated impressive performance on a wide range of language tasks. Most discussions focus on GPT-3’s performance. In this talk I will give a glimpse of how GPT-3 actually works, and ask and tentatively answer the question of whether it is a step towards creating Artificial General Intelligence (AGI). The talk has been given as a primer in a panel discussion on this topic.", }

@InProceedings{Hutter:20aixipess, author = "Michael Cohen and Marcus Hutter", title = "Pessimism About Unknown Unknowns Inspires Conservatism", booktitle = "33rd Conference on Learning Theory ({COLT'20})", address = "Virtual / Graz, Austria", volume = "125", series = "Proceedings of Machine Learning Research", pages = "1344--1373", _editor = "Jacob Abernethy and Shivani Agarwal", publisher = "PMLR", _month = jul, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#aixipess", http = "http://proceedings.mlr.press/v125/cohen20a.html", url = "http://arxiv.org/abs/2006.08753", pdf = "http://www.hutter1.net/publ/aixipess.pdf", slides = "http://www.hutter1.net/publ/saixipess.pdf", video = "http://www.colt2020.org/virtual/papers/paper_221.html", project = "http://www.hutter1.net/official/projects.htm#safe", issn = "1532-4435", keywords = "", abstract = "If we could define the set of all bad outcomes, we could hard-code an agent which avoids them; however, in sufficiently complex environments, this is infeasible. We do not know of any general-purpose approaches in the literature to avoiding novel failure modes. Motivated by this, we define an idealized Bayesian reinforcement learner which follows a policy that maximizes the worst-case expected reward over a set of world-models. We call this agent pessimistic, since it optimizes assuming the worst case. A scalar parameter tunes the agent's pessimism by changing the size of the set of world-models taken into account. Our first main contribution is: given an assumption about the agent's model class, a sufficiently pessimistic agent does not cause ``unprecedented events'' with probability $1-\delta$, whether or not designers know how to precisely specify those precedents they are concerned with. Since pessimism discourages exploration, at each timestep, the agent may defer to a mentor, who may be a human or some known-safe policy we would like to improve. Our other main contribution is that the agent's policy's value approaches at least that of the mentor, while the probability of deferring to the mentor goes to 0. In high-stakes environments, we might like advanced artificial agents to pursue goals cautiously, which is a non-trivial problem even if the agent were allowed arbitrary computing power; we present a formal solution.", support = "ARC grant DP150104590", znote = "Acceptance rate: 119/388 = 31\%", }

@TechReport{Hutter:20asymnn, author = "Marcus Hutter", title = "On Representing (Anti)Symmetric Functions", institution = "DeepMind", address = "London, UK", number = "arXiv:2007.15298", _month = jun, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#asymnn", url = "http://arxiv.org/abs/2007.15298", pdf = "http://www.hutter1.net/publ/asymnn.pdf", project = "http://www.hutter1.net/official/projects.htm#nn", keywords = "Neural network, approximation, universality, Slater determinant, Vandermonde matrix, equivariance, symmetry, anti-symmetry, symmetric polynomials, polarized basis, multilayer perceptron, continuity, smoothness", abstract = "Permutation-invariant, -equivariant, and -covariant functions and anti-symmetric functions are important in quantum physics, computer vision, and other disciplines. Applications often require most or all of the following properties: (a) a large class of such functions can be approximated, e.g. all continuous function, (b) only the (anti)symmetric functions can be represented, (c) a fast algorithm for computing the approximation, (d) the representation itself is continuous or differentiable, (e) the architecture is suitable for learning the function from data. (Anti)symmetric neural networks have recently been developed and applied with great success. A few theoretical approximation results have been proven, but many questions are still open, especially for particles in more than one dimension and the anti-symmetric case, which this work focusses on. More concretely, we derive natural polynomial approximations in the symmetric case, and approximations based on a single generalized Slater determinant in the anti-symmetric case. Unlike some previous super-exponential and discontinuous approximations, these seem a more promising basis for future tighter bounds. We provide a complete and explicit universality proof of the Equivariant MultiLayer Perceptron, which implies universality of symmetric MLPs and the FermiNet.", }

@TechReport{Hutter:20qcsol, author = "Elliot Catt and Marcus Hutter", title = "A Gentle Introduction to Quantum Computing Algorithms with Applications to Universal Prediction", institution = "Australian National University", address = "Canberra, Australia", number = "arXiv:2005.03137", _month = may, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#qcsol", url = "http://arxiv.org/abs/2005.03137", pdf = "http://www.hutter1.net/publ/qcsol.pdf", slides = "http://www.hutter1.net/publ/sqcsol.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", abstract = "In this technical report we give an elementary introduction to Quantum Computing for non- physicists. In this introduction we describe in detail some of the foundational Quantum Algorithms including: the Deutsch-Jozsa Algorithm, Shor’s Algorithm, Grocer Search, and Quantum Counting Algorithm and briefly the Harrow-Lloyd Algorithm. Additionally we give an introduction to Solomonoff Induction, a theoretically optimal method for prediction. We then attempt to use Quantum computing to find better algorithms for the approximation of Solomonoff Induction. This is done by using techniques from other Quantum computing algorithms to achieve a speedup in computing the speed prior, which is an approximation of Solomonoff’s prior, a key part of Solomonoff Induction. The major limiting factors are that the probabilities being computed are often so small that without a sufficient (often large) amount of trials, the error may be larger than the result. If a substantial speedup in the computation of an approximation of Solomonoff Induction can be achieved through quantum computing, then this can be applied to the field of intelligent agents as a key part of an approximation of the agent AIXI.", }

@InProceedings{Hutter:20bomai, author = "Michael Cohen and Badri Vellambi and Marcus Hutter", title = "Asymptotically Unambitious Artificial General Intelligence", booktitle = "Proc. 34rd {AAAI} Conference on Artificial Intelligence ({AAAI'20})", address = "New York, USA", _editor = "F. Rossi and V. Conitzer and F. Sha", volume = "34", number = "3", pages = "2467--2476", publisher = "AAAI Press", _month = feb, year = "2020", bibtex = "http://www.hutter1.net/official/bib.htm#bomai", url = "http://arxiv.org/abs/1905.12186", pdf = "http://www.hutter1.net/publ/bomai.pdf", slides = "http://www.hutter1.net/publ/sbomai.pdf", poster = "http://www.hutter1.net/publ/pbomai.pdf", press = "http://medium.com/analytics-vidhya/paper-summary-asymptotically-unambitious-artificial-general-intelligence-cohen-et-al-a5d091d501db", project = "http://www.hutter1.net/official/projects.htm#safe", issn = "2159-5399", isbn = "978-1-57735-835-0", doi = "10.1609/aaai.v34i03.5628", keywords = "artificial general intelligence; history; schedules; Bayes methods; existental threat; alignment problem; power; instrumental goal; reinforcement learning.", abstract = "General intelligence, the ability to solve arbitrary solvable problems, is supposed by many to be artificially constructible. Narrow intelligence, the ability to solve a given particularly difficult problem, has seen impressive recent development. Notable examples include self-driving cars, Go engines, im- age classifiers, and translators. Artificial General Intelligence (AGI) presents dangers that narrow intelligence does not: if something smarter than us across every domain were indif- ferent to our concerns, it would be an existential threat to humanity, just as we threaten many species despite no ill will. Even the theory of how to maintain the alignment of an AGI’s goals with our own has proven highly elusive. We present the first algorithm we are aware of for asymptotically unambitious AGI, where “unambitiousness” includes not seeking arbitrary power. Thus, we identify an exception to the Instrumental Convergence Thesis, which is roughly that by default, an AGI would seek power, including over us.", support = "ARC grant DP150104590", for = "080101(30%),010404(30%),220312(20%),080198(20%)", seo = "970108(80%),970117(20%)", znote = "Acceptance rate: 1591/7737=21\%", }

## %-------------Publications-of-Marcus-Hutter-2019--------------%

@Article{Hutter:19aligns, author = "Tom Everitt and Ramana Kumar and Marcus Hutter", title = "Designing Agent Incentives to Avoid Reward Tampering", journal = "Medium", volume = "8", number = "14", _month = aug, year = "2019", bibtex = "http://www.hutter1.net/official/bib.htm#aligns", url = "http://medium.com/@deepmindsafetyresearch/designing-agent-incentives-to-avoid-reward-tampering-4380c1bb6cd", pdf = "http://www.hutter1.net/publ/aligns.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", keywords = "AI safety, reinforcement learning, Bayesian learning, causal graphs", abstract = "From an AI safety perspective, having a clear design principle and a crisp characterization of what problem it solves means that we don’t have to guess which agents are safe. In this post and paper we describe how a design principle called current-RF optimization avoids the reward function tampering problem.", for = "080101(60%),220312(20%),080198(20%)", seo = "970108(80%),970117(20%)", }

@InProceedings{Hutter:19rlwlinfa, author = "Marcus Hutter and Samuel Yang-Zhao and Sultan Javed Majeed", title = "Conditions on Features for Temporal Difference-Like Methods to Converge", booktitle = "Proc. 28th International Joint Conf. on Artificial Intelligence ({IJCAI'19})", address = "Macao, China", _editor = "Sarit Kraus", _publisher = "IJCAI", pages = "2570--2577", _month = aug, year = "2019", bibtex = "http://www.hutter1.net/official/bib.htm#rlwlinfa", url = "http://arxiv.org/abs/1905.11702", pdf = "http://www.hutter1.net/publ/rlwlinfa.pdf", slides = "http://www.hutter1.net/publ/srlwlinfa.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", isbn = "978-0-9992411-4-1", doi = "10.24963/ijcai.2019/357", keywords = "reinforcement learning; temporal difference learning; Bellman equation; unique solution; linear function approximation; convergence; negative result; wrong solution; natural algorithm", abstract = "The convergence of many reinforcement learning (RL) algorithms with linear function approximation has been investigated extensively but most proofs assume that these methods converge to a unique solution. In this paper, we provide a complete characterization of non-uniqueness issues for a large class of reinforcement learning algorithms, simultaneously unifying many counter-examples to convergence in a theoretical framework. We achieve this by proving a new condition on features that can determine whether the convergence assumptions are valid or non-uniqueness holds. We consider a general class of RL methods, which we call natural algorithms, whose solutions are characterized as the fixed point of a projected Bellman equation. Our main result proves that natural algorithms converge to the correct solution if and only if all the value functions in the approximation space satisfy a certain shape. This implies that natural algorithms are, in general, inherently prone to converge to the wrong solution for most feature choices even if the value function can be represented exactly. Given our results, we show that state aggregation-based features are a safe choice for natural algorithms and also provide a condition for finding convergent algorithms under other feature constructions.", support = "ARC grant DP150104590", for = "080101(60%),010404(40%)", seo = "970108(100%)", znote = "Acceptance rate: 850/4752=35\%", }

@InProceedings{Hutter:19ksasao, author = "Michael Cohen and Elliot Catt and Marcus Hutter", title = "A Strongly Asymptotically Optimal Agent in General Environments", booktitle = "Proc. 28th International Joint Conf. on Artificial Intelligence ({IJCAI'19})", address = "Macao, China", _editor = "Sarit Kraus", _publisher = "IJCAI", pages = "2179--2186", _month = aug, year = "2019", bibtex = "http://www.hutter1.net/official/bib.htm#ksasao", url = "http://arxiv.org/abs/1903.01021", pdf = "http://www.hutter1.net/publ/ksasao.pdf", slides = "http://www.hutter1.net/publ/sksasao.pdf", poster = "http://www.hutter1.net/publ/pksasao.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", isbn = "978-0-9992411-4-1", doi = "10.24963/ijcai.2019/302", keywords = "reinforcement learning; model-based reasoning; sequential decision making; probabilistic inference; AIXI", abstract = "Reinforcement Learning agents are expected to eventually perform well. Typically, this takes the form of a guarantee about the asymptotic behavior of an algorithm given some assumptions about the environment. We present an algorithm for a policy whose value approaches the optimal value with probability 1 in all computable probabilistic environments, provided the agent has a bounded horizon. This is known as strong asymptotic optimality, and it was previously unknown whether it was possible for a policy to be strongly asymptotically optimal in the class of all computable probabilistic environments. Our agent, Inquisitive Reinforcement Learner (Inq), is more likely to explore the more it expects an exploratory action to reduce its uncertainty about which environment it is in, hence the term inquisitive. Exploring inquisitively is a strategy that can be applied generally; for more manageable environment classes, inquisitiveness is tractable. We conducted experiments in ``grid-worlds'' to compare the Inquisitive Reinforcement Learner to other weakly asymptotically optimal agents.", support = "ARC grant DP150104590", for = "080101(60%),010404(40%)", seo = "970108(100%)", znote = "Acceptance rate: 850/4752=35\%", }

@TechReport{Hutter:19fair, author = "Marcus Hutter", title = "Fairness without Regret", institution = "DeepMind \& ANU", _month = jul, year = "2019", bibtex = "http://www.hutter1.net/official/bib.htm#fair", url = "http://arxiv.org/abs/1907.05159", pdf = "http://www.hutter1.net/publ/fair.pdf", latex = "http://www.hutter1.net/publ/fair.tex", project = "http://www.hutter1.net/official/projects.htm#mixed", keywords = "utility; objective; optimal; fair/equitable/just; cost/regret; uncertainty.", abstract = "A popular approach of achieving fairness in optimization problems is by constraining the solution space to ``fair'' solutions, which unfortunately typically reduces solution quality. In practice, the ultimate goal is often an aggregate of sub-goals without a unique or best way of combining them or which is otherwise only partially known. I turn this problem into a feature and suggest to use a parametrized objective and vary the parameters within reasonable ranges to get a {\em set} of optimal solutions, which can then be optimized using secondary criteria such as fairness without compromising the primary objective, i.e.\ without regret (societal cost).", for = "220104(70%),010303(30%)", seo = "940401(70%),970108(30%)", }

@InProceedings{Hutter:19actagg, author = "Sultan Javed Majeed and Marcus Hutter", title = "Performance Guarantees for Homomorphisms beyond Markov Decision Processes", booktitle = "Proc. 33rd {AAAI} Conference on Artificial Intelligence ({AAAI'19})", address = "Honolulu, USA", volume = "33", pages = "7659--7666", publisher = "AAAI Press", _month = jan, year = "2019", bibtex = "http://www.hutter1.net/official/bib.htm#actagg", url = "http://arxiv.org/abs/1811.03895", pdf = "http://www.hutter1.net/publ/actagg.pdf", poster = "http://www.hutter1.net/publ/sactagg.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", issn = "2159-5399", isbn = "978-1-57735-809-1", doi = "10.1609/aaai.v33i01.33017659", keywords = "homomorphism; state aggregation; non-MDP; action-value aggregation; reinforcement learning.", abstract = "Most real-world problems have huge state and/or action spaces. Therefore, a naive application of existing tabular solution methods is not tractable on such problems. Nonetheless, these solution methods are quite useful if an agent has access to a relatively small state-action space homomorphism of the true environment and near-optimal performance is guaranteed by the map. A plethora of research is focused on the case when the homomorphism is a Markovian representation of the underlying process. However, we show that nearoptimal performance is sometimes guaranteed even if the homomorphism is non-Markovian.", support = "ARC grant DP150104590", for = "080101(50%),080198(50%)", seo = "970108(100%)", znote = "Acceptance rate: 1150/7095=16\%", }

## %-------------Publications-of-Marcus-Hutter-2018--------------%

@InProceedings{Hutter:18agisafe, author = "Tom Everitt and Gary Lea and Marcus Hutter", title = "{AGI} Safety Literature Review", booktitle = "Proc. 27th International Joint Conf. on Artificial Intelligence ({IJCAI'18})", address = "Stockholm, Sweden", _editor = "Jérôme Lang", _publisher = "IJCAI", pages = "5441--5449", _month = jul, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#safe", url = "http://arxiv.org/abs/1805.01109", pdf = "http://www.hutter1.net/publ/agisafe.pdf", slides = "http://www.hutter1.net/publ/sagisafe.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", isbn = "978-0-9992411-2-7", doi = "10.24963/ijcai.2018/768", keywords = "reinforcement learning; philosophical and ethical issues; artificial general intelligence; AGI safety; public policy; survey; future AGI.", abstract = "The development of Artificial General Intelligence (AGI) promises to be a major event. Along with its many potential benefits, it also raises serious safety concerns. The intention of this paper is to provide an easily accessible and up-to-date collection of references for the emerging field of AGI safety. A significant number of safety problems for AGI have been identified. We list these, and survey recent research on solving them. We also cover works on how best to think of AGI from the limited knowledge we have today, predictions for when AGI will first be created, and what will happen after its creation. Finally, we review the current public policy on AGI.", note = "IJCAI Review Track", support = "ARC grant DP150104590", for = "080101(60%),220312(20%),080198(20%)", seo = "970108(80%),970117(20%)", znote = "Acceptance rate: 15/43=35\%", }

@InProceedings{Hutter:18qnonmdp, author = "Sultan Javed Majeed, Marcus Hutter", title = "On {Q}-learning Convergence for Non-{M}arkov Decision Processes", booktitle = "Proc. 27th International Joint Conf. on Artificial Intelligence ({IJCAI'18})", address = "Stockholm, Sweden", _editor = "Jérôme Lang", _publisher = "IJCAI", pages = "2546--2552", _month = jul, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#qnonmdp", xurl = "http://arxiv.org/abs/1807.none", pdf = "http://www.hutter1.net/publ/qnonmdp.pdf", slides = "http://www.hutter1.net/publ/sqnonmdp.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", isbn = "978-0-9992411-2-7", doi = "10.24963/ijcai.2018/353", keywords = "reinforcement learning; TD-learning; Q-learning; non-MDP; non-ergodic; convergence; abstractions; state-uniformity.", abstract = "Temporal-difference (TD) learning is an attractive, computationally efficient framework for model- free reinforcement learning. Q-learning is one of the most widely used TD learning technique that enables an agent to learn the optimal action-value function, i.e. Q-value function. Contrary to its widespread use, Q-learning has only been proven to converge on Markov Decision Processes (MDPs) and Q-uniform abstractions of finite-state MDPs. On the other hand, most real-world problems are inherently non-Markovian: the full true state of the environment is not revealed by recent observations. In this paper, we investigate the behavior of Q-learning when applied to non-MDP and non-ergodic domains which may have infinitely many underlying states. We prove that the convergence guarantee of Q-learning can be extended to a class of such non-MDP problems, in particular, to some non-stationary domains. We show that state-uniformity of the optimal Q-value function is a necessary and sufficient condition for Q-learning to converge even in the case of infinitely many internal states.", for = "080101(50%),080198(50%)", seo = "970108(100%)", znote = "Acceptance rate: 710/3470=21\%", }

@Article{Hutter:18off2onx, author = "Marcus Hutter", title = "Tractability of Batch to Sequential Conversion", journal = "Theoretical Computer Science", volume = "733", pages = "71--82", publisher = "Elsevier", _month = jul, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#off2onx", url = "http://arxiv.org/abs/1407.3334", pdf = "http://www.hutter1.net/publ/off2onx.pdf", latex = "http://www.hutter1.net/publ/off2onx.tex", slides = "http://www.hutter1.net/publ/soff2on.pdf", project = "http://www.hutter1.net/official/projects.htm#infoth", issn = "0304-3975", doi = "10.1016/j.tcs.2018.04.037", keywords = "offline; online; batch; sequential; probability; estimation; prediction; time-consistency; normalization; tractable; regret; combinatorics; Bayes; Laplace; Ristad; Good-Turing.", abstract = "We consider the problem of converting batch estimators into a sequential predictor or estimator with small extra regret. Formally this is the problem of merging a collection of probability measures over strings of length 1,2,3,... into a single probability measure over infinite sequences. We describe various approaches and their pros and cons on various examples. As a side-result we give an elementary non-heuristic purely combinatoric derivation of Turing's famous estimator. Our main technical contribution is to determine the computational complexity of sequential estimators with good guarantees in general. We conclude with an open problem on how to derive tractable sequential from batch estimators with good guarantees in general.", for = "080401(30%),080201(30%),010405(40%)", seo = "970108(100%)", }

@InProceedings{Hutter:18convbinctw, author = "Badri N. Vellambi and Marcus Hutter", title = "Convergence of Binarized Context-tree Weighting for Estimating Distributions of Stationary Sources", booktitle = "Proc. {IEEE} International Symposium on Information Theory ({ISIT'18})", address = "Vail, USA", pages = "731--735", _editor = "R. L. Urbanke and M. K. Varanasi", publisher = "IEEE", _month = jun, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#convbinctw", pdf = "http://www.hutter1.net/publ/convbinctw.pdf", slides = "http://www.hutter1.net/publ/sconvbinctw.pdf", project = "http://www.hutter1.net/official/projects.htm#compress", issn = "2157-8117", isbn = "978-1-5386-4780-6", doi = "10.1109/ISIT.2018.8437737", keywords = "Context-tree weighting; KT estimator; frequency estimator; binarization; stationary distribution; tree source; stationary ergodic source; convergence rate; worst-case bounds.", abstract = "This work investigates the convergence rate of learning the stationary distribution of finite-alphabet stationary ergodic sources using a binarized context-tree weighting approach. The binarized context-tree weighting (CTW) algorithm estimates the stationary distribution of a symbol as a product of conditional distributions of each component bit, which are determined in a sequential manner using the well known binary context-tree weighting method. We establish that CTW algorithm is a consistent estimator of the stationary distribution, and that the worst-case $L_1$-prediction error between the CTW and frequency estimates using $n$ source symbols each of which when binarized consists of $k>1$ bits decays as $\Theta(\sqrt{2^k\log(n)/n})$.", support = "ARC grants DP120100950 and DP150104590", for = "080401(100%)", seo = "970108(80%),890205(20%)", }

@Article{Hutter:18align, author = "Tom Everitt and Marcus Hutter", title = "The Alignment Problem for History-Based {B}ayesian Reinforcement Learners", journal = "submitted", _month = jun, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#align", http = "http://www.tomeveritt.se/papers/alignment.pdf", pdf = "http://www.hutter1.net/publ/align.pdf", slides = "http://www.hutter1.net/publ/salign.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", keywords = "AI safety, reinforcement learning, Bayesian learning, causal graphs", abstract = "Value alignment is often considered a critical component of safe artificial intelligence. Meanwhile, reinforcement learning is often criticized as being inherently unsafe and misaligned, for reasons such as wireheading, delusionboxes, misspecified reward functions and distributional shifts. In this paper, we categorize sources of misalignment for reinforcement learn- ing agents, illustrating each type with numerous examples. For each type of problem, we also describe ways to remove the source of misalignment. Combined, the suggestions form high-level blueprints for how to design value aligned RL agents.", support = "ARC grant DP150104590", for = "080101(60%),220312(20%),080198(20%)", seo = "970108(80%),970117(20%)", note = "First winner of the AI alignment prize round 2: http://www.lesswrong.com/posts/SSEyiHaACSYDHcYZz/announcement-ai-alignment-prize-round-2-winners-and-next", }

@Article{Hutter:18aixicplexx, author = "Jan Leike and Marcus Hutter", title = "On the Computability of {S}olomonoff Induction and {AIXI}", journal = "Theoretical Computer Science", volume = "716", pages = "28--49", publisher = "Elsevier", _month = mar, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#aixicplexx", pdf = "http://www.hutter1.net/publ/aixicplexx.pdf", slides = "http://www.hutter1.net/publ/saixicplex.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "0304-3975", doi = "10.1016/j.tcs.2017.11.020", keywords = "Solomonoff induction; AIXI; General reinforcement learning; Knowledge-seeking agents; Computability; Arithmetical hierarchy.", abstract = "How could we solve the machine learning and the artificial intelligence problem if we had infinite computation? Solomonoff induction and the reinforcement learning agent AIXI are proposed answers to this question. Both are known to be incomputable. We quantify this using the arithmetical hierarchy, and prove upper and in most cases corresponding lower bounds for incomputability. Moreover, we show that AIXI is not limit computable, thus it cannot be approximated using finite computation. However there are limit computable epsilon-optimal approximations to AIXI. We also derive computability bounds for knowledge-seeking agents, and give a limit computable weakly asymptotically optimal reinforcement learning agent.", support = "ARC grant DP150104590", for = "080101(50%),080201(50%)", seo = "970108(100%)", }

@InProceedings{Hutter:18piidkkt, author = "Badri N. Vellambi and Owen Cameron and Marcus Hutter", title = "Universal Compression of Piecewise i.i.d. Sources", booktitle = "Proc. Data Compression Conference ({DCC'18})", pages = "267--276", _editor = "Ali Bilgin and Michael W. Marcellin and Joan Serra{-}Sagrist{\`{a}} and James A. Storer", publisher = "IEEE Computer Society", address = "Snowbird, Utah, USA", _address = "Alamitos, CA (publisher)", _month = mar, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#piidkkt", pdf = "http://www.hutter1.net/publ/piidkkt.pdf", slides = "http://www.hutter1.net/publ/spiidkkt.pdf", project = "http://www.hutter1.net/official/projects.htm#compress", doi = "10.1109/DCC.2018.00035", issn = "2375-0359", isbn = "978-1-5386-4884-1", keywords = "switching data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.", abstract = "We study the problem of compressing piecewise i.i.d. sources, which models the practical application of jointly compressing multiple disparate data files. We establish that universal compression of piecewise i.i.d data is possible by modeling the data as a Markov process whose memory grows logarithmically in the size of the data using the Krichevsky-Trofimov (KT) estimator. The memory order is chosen large enough so that the successful gleaning of the distribution of the different pieces of the data from the corresponding contexts is possible, and simultaneously small enough that this learning can occur for almost any realization of any piecewise data process.", support = "ARC grants DP120100950 and DP150104590", for = "080401(100%)", seo = "970108(80%),890205(20%)", }

@InCollection{Hutter:18uaitas, author = "Tom Everitt and Marcus Hutter", title = "Universal Artificial Intelligence: Practical Agents and Fundamental Challenges", booktitle = "Foundations of Trusted Autonomy", _series = "Studies in Systems, Decision and Control 117", chapter = "2", pages = "15--46", editor = "Hussein A. Abbass and Jason Scholz and Darryn J. Reid", publisher = "Springer", _month = jan, year = "2018", bibtex = "http://www.hutter1.net/official/bib.htm#uaitas", xurl = "http://arxiv.org/abs/1801.none", pdf = "http://www.hutter1.net/publ/uaitas.pdf", slides = "http://www.hutter1.net/publ/suaitas.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "2198-4182", isbn = "978-3-319-64815-6", doi = "10.1007/978-3-319-64816-3_2", keywords = "foundations; general reinforcement learning; AI safety; Solomonoff induction; intelligent agents.", abstract = "Foundational theories have contributed greatly to scientific progress in many fields. Examples include Zermelo-Fraenkel set theory in mathematics, and universal Turing machines in computer science. Universal Artificial Intelligence (UAI) is an increasingly well-studied foundational theory for artificial intelligence, based on ancient principles in the philosophy of science and modern developments in information and probability theory. Importantly, it refrains from making unrealistic Markov, ergodicity, or stationarity assumptions on the environment. UAI provides a theoretically optimal agent AIXI and principled ideas for constructing practical autonomous agents. The theory also makes it possible to establish formal results on the motivations of AI systems. Such results may greatly enhance the trustability of autonomous agents, and guide design choices towards more robust agent architectures and incentive schemes. Finally, UAI offers a deeper appreciation of fundamental problems such as the induction problem and the exploration-exploitation dilemma.", support = "ARC grant DP150104590", for = "080101(80%),220312(20%)", seo = "970108(80%),970117(20%)", znote = "68500+ downloads in 2018. Top 10 most downloaded Springer books in 2018 across all Engineering: http://www.springer.com/gp/campaigns/highlights-2018/engineering-2018", }

## %-------------Publications-of-Marcus-Hutter-2017--------------%

@InCollection{Hutter:17unilearn, author = "Marcus Hutter", title = "Universal Learning Theory", booktitle = "Encyclopedia of Machine Learning and Data Mining", pages = "1295--1304", editor = "C. Sammut and G. Webb", publisher = "Springer", _month = aug, year = "2017", edition = "2nd", bibtex = "http://www.hutter1.net/official/bib.htm#unilearn", url = "http://arxiv.org/abs/1102.2467", pdf = "http://www.hutter1.net/publ/unilearn.pdf", latex = "http://www.hutter1.net/publ/unilearn.tex", slides = "http://www.hutter1.net/ai/susp.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1007/978-1-4899-7687-1_867", isbn = "978-1-4899-7686-4", keywords = "Algorithmic probability; Ray Solomonoff; induction; prediction; decision; action; Turing machine; Kolmogorov complexity; universal prior; Bayes' rule.", abstract = "This encyclopedic article gives a mini-introduction into the theory of universal learning, founded by Ray Solomonoff in the 1960s and significantly developed and extended in the last decade. It explains the spirit of universal learning, but necessarily glosses over technical subtleties.", for = "080401(30%),010405(30%),080198(40%)", seo = "970108(100%)", }

@InProceedings{Hutter:17thompgrls, author = "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter", title = "On {T}hompson Sampling and Asymptotic Optimality", booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})", address = "Melbourne, Australia", _editor = "Carles Sierra", _publisher = "IJCAI", pages = "4889--4893", _month = aug, year = "2017", bibtex = "http://www.hutter1.net/official/bib.htm#thompgrls", url = "http://arxiv.org/abs/1602.07905", pdf = "http://www.hutter1.net/publ/thompgrls.pdf", slides = "http://www.hutter1.net/publ/sthompgrl.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", isbn = "978-0-9992411-0-3", doi = "10.24963/ijcai.2017/688", keywords = "General reinforcement learning; Thompson sampling; asymptotic optimality; regret; discounting; recoverability; AIXI", abstract = "We discuss some recent results on Thompson sampling for nonparametric reinforcement learning in countable classes of general stochastic environments. These environments can be non-Markovian, non-ergodic, and partially observable. We show that Thompson sampling learns the environment class in the sense that (1) asymptotically its value converges in mean to the optimal value and (2) given a recoverability assumption regret is sublinear. We conclude with a discussion about optimality in reinforcement learning.", support = "ARC grant DP150104590", note = "Best sister conferences paper track", for = "080101(60%),010404(40%)", seo = "970108(100%)", }

@InProceedings{Hutter:17corruptrl, author = "Tom Everitt and Victoria Krakovna and Laurent Orseau and Marcus Hutter and Shane Legg", title = "Reinforcement Learning with Corrupted Reward Signal", booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})", address = "Melbourne, Australia", _editor = "Carles Sierra", _publisher = "IJCAI", pages = "4705--4713", _month = aug, year = "2017", bibtex = "http://www.hutter1.net/official/bib.htm#corruptrl", url = "http://arxiv.org/abs/1705.08417", pdf = "http://www.hutter1.net/publ/corruptrl.pdf", slides = "http://www.hutter1.net/publ/scorruptrl.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", isbn = "978-0-9992411-0-3", doi = "10.24963/ijcai.2017/656", keywords = "decoupled reinforcement learning; reward corruption; quantilisation; robustness; value learning.", abstract = "No real-world reward function is perfect. Sensory errors and software bugs may result in agents getting higher (or lower) rewards than they should. For example, a reinforcement learning agent may prefer states where a sensory error gives it the maximum reward, but where the true reward is actually small. We formalise this problem as a generalised Markov Decision Problem called Corrupt Reward MDP. Traditional RL methods fare poorly in CRMDPs, even under strong simplifying assumptions and when trying to compensate for the possibly corrupt rewards. Two ways around the problem are investigated. First, by giving the agent richer data, such as in inverse reinforcement learning and semi-supervised reinforcement learning, reward corruption stemming from systematic sensory errors may sometimes be completely managed. Second, by using randomisation to blunt the agent's optimisation, reward corruption can be partially managed under some assumptions.", support = "ARC grant DP150104590", for = "080101(80%),220312(20%)", seo = "970108(80%),970117(20%)", znote = "Acceptance rate: 660/2540=26\%", }

@InProceedings{Hutter:17cbefsrl, author = "Jarryd Martin and Suraj Narayanan Sasikumar and Tom Everitt and Marcus Hutter", title = "Count-Based Exploration in Feature Space for Reinforcement Learning", booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})", address = "Melbourne, Australia", _editor = "Carles Sierra", _publisher = "IJCAI", pages = "2471--2478", _month = aug, year = "2017", bibtex = "http://www.hutter1.net/official/bib.htm#cbefsrl", url = "http://arxiv.org/abs/1706.08090", pdf = "http://www.hutter1.net/publ/cbefsrl.pdf", slides = "http://www.hutter1.net/publ/scbefsrl.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", isbn = "978-0-9992411-0-3", doi = "10.24963/ijcai.2017/344", keywords = "reinforcement learning; Markov decision process; planning under uncertainty; sequential decision making; count-based exploration; ", abstract = "We introduce a new count-based optimistic exploration algorithm for reinforcement learning (RL) that is feasible in environments with high-dimensional state-action spaces. The success of RL algorithms in these domains depends crucially on generalisation from limited training experience. Function approximation techniques enable RL agents to generalise in order to estimate the value of unvisited states, but at present few methods enable generalisation regarding uncertainty. This has prevented the combination of scalable RL algorithms with efficient exploration strategies that drive the agent to reduce its uncertainty. We present a new method for computing a generalised state visit-count, which allows the agent to estimate the uncertainty associated with any state. Our phi-pseudo-count achieves generalisation by exploiting the same feature representation of the state space that is used for value function approximation. States that have less frequently observed features are deemed more uncertain. The phi-Exploration-Bonus algorithm rewards the agent for exploring in feature space rather than in the untransformed state space. The method is simpler and less computationally expensive than some previous proposals, and achieves near state-of-the-art results on high-dimensional RL benchmarks.", support = "ARC grant DP150104590", for = "080199(50%),080101(50%)", seo = "970108(100%)", znote = "Acceptance rate: 660/2540=26\% Also presented at SURL'17 http://www.surl.tirl.info/", }

@InProceedings{Hutter:17urlsurexp, author = "John Aslanides and Jan Leike and Marcus Hutter", title = "Universal Reinforcement Learning Algorithms: Survey and Experiments", booktitle = "Proc. 26th International Joint Conf. on Artificial Intelligence ({IJCAI'17})", address = "Melbourne, Australia", _editor = "Carles Sierra", _publisher = "IJCAI", pages = "1403--1410", _month = aug, year = "2017", bibtex = "http://www.hutter1.net/official/bib.htm#urlsurexp", url = "http://arxiv.org/abs/1705.10557", pdf = "http://www.hutter1.net/publ/urlsurexp.pdf", slides = "http://www.hutter1.net/publ/surlsurexp.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", demo = "http://www.hutter1.net/aixijs/", code = "http://github.com/aslanides/aixijs", isbn = "978-0-9992411-0-3", doi = "10.24963/ijcai.2017/194", keywords = "universal reinforcement learning; multi-agent system; sequential decision making; survey; online demo; java code.", abstract = "Many state-of-the-art reinforcement learning (RL) algorithms typically assume that the environment is an ergodic Markov Decision Process (MDP). In contrast, the field of universal reinforcement learning (URL) is concerned with algorithms that make as few assumptions as possible about the environment. The universal Bayesian agent AIXI and a family of related URL algorithms have been developed in this setting. While numerous theoretical optimality results have been proven for these agents, there has been no empirical investigation of their behavior to date. We present a short and accessible survey of these URL algorithms under a unified notation and framework, along with results of some experiments that qualitatively illustrate some properties of the resulting policies, and their relative performance on partially-observable grid-world environments. We also present an open-source reference implementation of the algorithms which we hope will facilitate further understanding of, and experimentation with, these ideas.", support = "ARC grant DP150104590", for = "080199(40%),080101(40%),010404(10%),010405(10%)", seo = "970108(100%)", znote = "Acceptance rate: 660/2540=26\%", }

@InProceedings{Hutter:17offswitch, author = "Tobias Wängberg and Mikael Böörs and Elliot Catt and Tom Everitt and Marcus Hutter", title = "A Game-Theoretic Analysis of The Off-Switch Game", booktitle = "Proc. 10th Conf. on Artificial General Intelligence ({AGI'17})", address = "Melbourne, Australia", series = "LNAI", volume = "10414", pages = "167--177", _editor = "Tom Everitt and Ben Goertzel and Alexey Potapov", publisher = "Springer", _month = aug, year = "2017", bibtex = "http://www.hutter1.net/official/bib.htm#offswitch", url = "http://arxiv.org/abs/1708.03871", pdf = "http://www.hutter1.net/publ/offswitch.pdf", slides = "http://www.hutter1.net/publ/soffswitch.pdf", project = "http://www.hutter1.net/official/projects.htm#safe", doi = "10.1007/978-3-319-63703-7_16", issn = "0302-9743", isbn = "978-3-319-63702-0", keywords = "AI safety; corrigibility; intelligent agents; game theory; uncertainty.", abstract = "The off-switch game is a game theoretic model of a highly intelligent robot interacting with a human. In the original paper by Hadfield-Menell et al. (2016b), the analysis is not fully game-theoretic as the human is modelled as an irrational player, and the robot’s best action is only calculated under unrealistic normality and soft-max assumptions. In this paper, we make the analysis fully game theoretic, by modelling the human as a rational player with a random utility function. As a consequence, we are able to easily calculate the robot’s best action for arbitrary belief and irrationality assumptions.", for = "080101(80%),220312(20%)", seo = "970108(80%),970117(20%)", znote = "Also presented at PT-AI 2017. http://www.pt-ai.org/2017/papers Acceptance rate: 28/77 = 36\% (oral presentation) [51/77=66\% incl. posters].", }

@InProceedings{Hutter:17expdisc, author = "Sean Lamont and John Aslanides and Jan Leike and Marcus Hutter", title = "Generalised Discount Functions applied to a {M}onte-{C}arlo {AI}$\mu$ Implementation", booktitle = "Proc. 16th Conf. on Autonomous Agents and MultiAgent Systems ({AAMAS'17})", pages = "1589--1591", _editor = "Sanmay Das and Ed Durfee and Kate Larson and Michael Winikoff", _publisher = "International Foundation for Autonomous Agents and Multiagent Systems", address = "Sao Paulo, Brazil", _month = may, year = "2017", bibtex = "http://www.hutter1.net/official/bib.htm#expdisc", http = "http://dl.acm.org/citation.cfm?id=3091372", url = "http://arxiv.org/abs/1703.01358", pdf = "http://www.hutter1.net/publ/expdisc.pdf", latex = "http://www.hutter1.net/publ/expdisc.tex", slides = "http://www.hutter1.net/publ/sexpdisc.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", demo = "http://www.hutter1.net/aixijs/", code = "http://github.com/aslanides/aixijs", keywords = "Monte Carlo; discount function; reinforcement learning; time consistency", abstract = "In recent years, work has been done to develop the theory of General Reinforcement Learning (GRL). However, there are no examples demonstrating the known results regarding generalised discounting. We have added to the GRL simulation platform (AIXIjs) the functionality to assign an agent arbitrary discount functions, and an environment which can be used to determine the effect of discounting on an agent's policy. Using this, we investigate how geometric, hyperbolic and power discounting affect an informed agent in a simple MDP. We experimentally reproduce a number of theoretical results, and discuss some related subtleties. It was found that the agent's behaviour followed what is expected theoretically, assuming appropriate parameters were chosen for the Monte-Carlo Tree Search (MCTS) planning algorithm.", support = "ARC grant DP150104590", for = "080199(40%),080101(40%),010404(10%),010405(10%)", seo = "970108(100%)", znote = "Acceptance rate: 276/567 = 49\%", }

## %-------------Publications-of-Marcus-Hutter-2016--------------%

@Article{Hutter:16exsaggx, author = "Marcus Hutter", title = "Extreme State Aggregation beyond {M}arkov Decision Processes", journal = "Theoretical Computer Science", volume = "650", pages = "73--91", publisher = "Elsevier", _month = oct, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#exsaggx", url = "http://arxiv.org/abs/1407.3341", pdf = "http://www.hutter1.net/publ/exsaggx.pdf", latex = "http://www.hutter1.net/publ/exsaggx.tex", slides = "http://www.hutter1.net/publ/sexsagg.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", issn = "0304-3975", doi = "10.1016/j.tcs.2016.07.032", keywords = "State aggregation; Reinforcement learning; Non-MDP", abstract = "We consider a Reinforcement Learning setup where an agent interacts with an environment in observation–reward–action cycles without any (esp. MDP) assumptions on the environment. State aggregation and more generally feature reinforcement learning is concerned with mapping histories/raw-states to reduced/aggregated states. The idea behind both is that the resulting reduced process (approximately) forms a small stationary finite-state MDP, which can then be efficiently solved or learnt. We considerably generalize existing aggregation results by showing that even if the reduced process is not an MDP, the (q-)value functions and (optimal) policies of an associated MDP with same state-space size solve the original problem, as long as the solution can approximately be represented as a function of the reduced states. This implies an upper bound on the required state space size that holds uniformly for all RL problems. It may also explain why RL algorithms designed for MDPs sometimes perform well beyond MDPs.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(100%)", }

@InProceedings{Hutter:16aixideath, author = "Jarryd Martin and Tom Everitt and Marcus Hutter", title = "Death and Suicide in Universal Artificial Intelligence", booktitle = "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})", address = "New York, USA", series = "LNAI", volume = "9782", pages = "23--32", _editor = "Bas Steunebrink and Pei Wang and Ben Goertzel", publisher = "Springer", _month = jul, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#aixideath", url = "http://arxiv.org/abs/1606.00652", pdf = "http://www.hutter1.net/publ/aixideath.pdf", latex = "http://www.hutter1.net/publ/aixideath.tex", slides = "http://www.hutter1.net/publ/saixideath.pdf", video = "http://youtu.be/c__OjDHqFs", project = "http://www.hutter1.net/official/projects.htm#safe", doi = "10.1007/978-3-319-41649-6_3", issn = "0302-9743", isbn = "978-3-319-41648-9", keywords = "intelligent agents; death; suicide; aixi; reinforcement learning; semimeasure", abstract = "Reinforcement learning (RL) is a general paradigm for studying intelligent behaviour, with applications ranging from artificial intelligence to psychology and economics. AIXI is a universal solution to the RL problem; it can learn any computable environment. A technical subtlety of AIXI is that it is defined using a mixture over semimeasures that need not sum to 1, rather than over proper probability measures. In this work we argue that the shortfall of a semimeasure can naturally be interpreted as the agent's estimate of the probability of its death. We formally define death for generally intelligent agents like AIXI, and prove a number of related theorems about their behaviour. Notable discoveries include that agent behaviour can change radically under positive linear transformations of the reward signal (from suicidal to dogmatically self-preserving), and that the agent's posterior belief that it will survive increases over time.", support = "ARC grant DP150104590", for = "080101(80%),220312(20%)", seo = "970108(80%),970122(10%),970117(10%)", znote = "Acceptance rate: 24/67 = 36\%", }

@InProceedings{Hutter:16wirehead, author = "Tom Everitt and Marcus Hutter", title = "Avoiding Wireheading with Value Reinforcement Learning", booktitle = "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})", address = "New York, USA", series = "LNAI", volume = "9782", pages = "12--22", _editor = "Bas Steunebrink and Pei Wang and Ben Goertzel", publisher = "Springer", _month = jul, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#wirehead", url = "http://arxiv.org/abs/1605.03143", pdf = "http://www.hutter1.net/publ/wirehead.pdf", latex = "http://www.hutter1.net/publ/wirehead.tex", slides = "http://www.hutter1.net/publ/swirehead.pdf", video = "http://youtu.be/sqFc2-_mDCk", project = "http://www.hutter1.net/official/projects.htm#safe", doi = "10.1007/978-3-319-41649-6_2", issn = "0302-9743", isbn = "978-3-319-41648-9", keywords = "intelligent agents; reinforcement learning; wireheading; value RL; utility function; safety", abstract = "How can we design good goals for arbitrarily intelligent agents? Reinforcement learning (RL) is a natural approach. Unfortunately, RL does not work well for generally intelligent agents, as RL agents are incentivised to shortcut the reward sensor for maximum reward -- the so-called wireheading problem. In this paper we suggest an alternative to RL called value reinforcement learning (VRL). In VRL, agents use the reward signal to learn a utility function. The VRL setup allows us to remove the incentive to wirehead by placing a constraint on the agent's actions. The constraint is defined in terms of the agent's belief distributions, and does not require an explicit specification of which actions constitute wireheading.", support = "ARC grant DP150104590", for = "080101(70%),220312(30%)", seo = "970108(60%),970122(20%),970117(20%)", znote = "Acceptance rate: 24/67 = 36\%", }

@InProceedings{Hutter:16selfmod, author = "Tom Everitt and Daniel Filan and Mayank Daswani and Marcus Hutter", title = "Self-Modification of Policy and Utility Function in Rational Agents", booktitle = "Proc. 9th Conf. on Artificial General Intelligence ({AGI'16})", address = "New York, USA", series = "LNAI", volume = "9782", pages = "1--11", _editor = "Bas Steunebrink and Pei Wang and Ben Goertzel", publisher = "Springer", _month = jul, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#selfmod", url = "http://arxiv.org/abs/1605.03142", pdf = "http://www.hutter1.net/publ/selfmod.pdf", latex = "http://www.hutter1.net/publ/selfmod.tex", video = "http://youtu.be/sqFc2-_mDCk", award = "http://agi-conf.org/2016/prizes/", project = "http://www.hutter1.net/official/projects.htm#safe", doi = "10.1007/978-3-319-41649-6_1", issn = "0302-9743", isbn = "978-3-319-41648-9", keywords = "intelligent agents; self-modification; goal preservation; utility functions; control problem; safety", abstract = "Any agent that is part of the environment it interacts with and has versatile actuators (such as arms and fingers), will in principle have the ability to self-modify -- for example by changing its own source code. As we continue to create more and more intelligent agents, chances increase that they will learn about this ability. The question is: will they want to use it? For example, highly intelligent systems may find ways to change their goals to something more easily achievable, thereby `escaping' the control of their designers. In an important paper, Omohundro (2008) argued that goal preservation is a fundamental drive of any intelligent system, since a goal is more likely to be achieved if future versions of the agent strive towards the same goal. In this paper, we formalise this argument in general reinforcement learning, and explore situations where it fails. Our conclusion is that the self-modification possibility is harmless if and only if the value function of the agent anticipates the consequences of self-modifications and use the current utility function when evaluating the future.", support = "ARC grant DP150104590", for = "080101(70%),220312(30%)", seo = "970108(60%),970122(20%),970117(20%)", znote = "Acceptance rate: 24/67 = 36\%", note = "Winner of the Kurzweil Prize for Best AGI Paper", }

@InProceedings{Hutter:16vacrecog, author = "Basura Fernando and Peter Anderson and Marcus Hutter and Stephen Gould", title = "Discriminative Hierarchical Rank Pooling for Activity Recognition", booktitle = "Proc. IEEE Conference on Computer Vision and Pattern Recognition ({CVPR'16})", address = "Las Vegas, NV, USA", pages = "1924--1932", _editor = "Lourdes Agapito, Tamara Berg, Jana Kosecka, Lihi Zelnik-Manor", publisher = "IEEE", _month = jun, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#vacrecog", pdf = "http://www.hutter1.net/publ/vacrecog.pdf", project = "http://www.hutter1.net/official/projects.htm#cvip", code = "http://www.hutter1.net/publ/varcode.zip", issn = "1063-6919", doi = "10.1109/CVPR.2016.212", keywords = "rank pooling; activity classification; hierarchy; video; training; convolutional neural network; nonlinear feature functions", abstract = "We present hierarchical rank pooling, a video sequence encoding method for activity recognition. It consists of a network of rank pooling functions which captures the dynamics of rich convolutional neural network features within a video sequence. By stacking non-linear feature functions and rank pooling over one another, we obtain a high capacity dynamic encoding mechanism, which is used for action recognition. We present a method for jointly learning the video representation and activity classifier parameters. Our method obtains state-of-the art results on three important activity recognition benchmarks: 76.7\% on Hollywood2, 66.9\% on HMDB51 and, 91.4\% on UCF101.", for = "080104(50%),080106(50%)", seo = "970108(100%)", znote = "Acceptance rate: 643/1865 = 30\%", }

@InProceedings{Hutter:16thompgrl, author = "Jan Leike and Tor Lattimore and Laurent Orseau and Marcus Hutter", title = "Thompson Sampling is Asymptotically Optimal in General Environments", booktitle = "Proc. 32nd International Conf. on Uncertainty in Artificial Intelligence ({UAI'16})", address = "New Jersey, USA", _editor = "Alexander Ihler and Dominik Janzing", publisher = "AUAI Press", pages = "417--426", _month = jun, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#thompgrl", http = "http://auai.org/uai2016/proceedings/papers/20.pdf", url = "http://arxiv.org/abs/1602.07905", pdf = "http://www.hutter1.net/publ/thompgrl.pdf", latex = "http://www.hutter1.net/publ/thompgrl.tex", slides = "http://www.hutter1.net/publ/sthompgrl.pdf", award = "http://auai.org/uai2016/program.php", project = "http://www.hutter1.net/official/projects.htm#uai", isbn = "978-0-9966431-1-5", keywords = "General reinforcement learning; Thompson sampling; asymptotic optimality; regret; discounting; recoverability; AIXI", abstract = "We discuss a variant of Thompson sampling for nonparametric reinforcement learning in countable classes of general stochastic environments. These environments can be non-Markov, nonergodic, and partially observable. We show that Thompson sampling learns the environment class in the sense that (1) asymptotically its value converges to the optimal value in mean and (2) given a recoverability assumption regret is sublinear.", support = "ARC grant DP150104590", for = "080101(60%),010404(40%)", seo = "970108(100%)", note = "Best student paper", znote = "Acceptance rate: 26/275 = 9\% (oral!) [85/275 = 31\% incl. poster]", }

@InProceedings{Hutter:16speedprior, author = "Daniel Filan and Jan Leike and Marcus Hutter", title = "Loss Bounds and Time Complexity for Speed Priors", booktitle = "Proc. 19th International Conf. on Artificial Intelligence and Statistics ({AISTATS'16})", address = "Cadiz, Spain", volume = "51", _editor = "Arthur Gretton and Christian Robert", publisher = "Microtome", pages = "1394--1402", _month = may, year = "2016", bibtex = "http://www.hutter1.net/official/bib.htm#speedprior", http = "http://jmlr.org/proceedings/papers/v51/", url = "http://arxiv.org/abs/1604.03343", pdf = "http://www.hutter1.net/publ/speedprior.pdf", latex = "http://www.hutter1.net/publ/speedprior.tex", project = "http://www.hutter1.net/official/projects.htm#ait", issn = "0302-9743", keywords = "universal distribution; speed prior; computational complexity; predictive performance; upper bounds.", abstract = "This paper establishes for the first time the predictive performance of speed priors and their computational complexity. A speed prior is essentially a probability distribution that puts low probability on strings that are not efficiently computable. We propose a variant to the original speed prior (Schmidhuber, 2002), and show that our prior can predict sequences drawn from probability measures that are estimable in polynomial time. Our speed prior is computable in doubly-exponential time, but not in polynomial time. On a polynomial time computable sequence our speed prior is computable in exponential time. We show better upper complexity bounds for Schmidhuber's speed prior under the same conditions, and that it predicts deterministic sequences that are computable in polynomial time; however, we also show that it is not computable in polynomial time, and the question of its predictive properties for stochastic sequences remains open.", support = "ARC grant DP150104590", for = "080101(20%),080201(30%),080401(20%),010404(30%)", seo = "970108(100%)", znote = "Acceptance rate: 165/537 = 31\%", }

## %-------------Publications-of-Marcus-Hutter-2015--------------%

@InProceedings{Hutter:15metasearch1, author = "Tom Everitt and Marcus Hutter", title = "Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art I: {T}ree Search", booktitle = "Proc. 28th Australasian Joint Conference on Artificial Intelligence ({AusAI'15})", address = "Canberra, Australia", series = "LNAI", volume = "9457", _editor = "Bernhard Pfahringer and Jochen Renz", publisher = "Springer", pages = " 157--165", _month = dec, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#metasearch1", url = "http://arxiv.org/abs/1509.02709", pdf = "http://www.hutter1.net/publ/metasearch1.pdf", slides = "http://www.hutter1.net/publ/smetasearch.pdf", project = "http://www.hutter1.net/official/projects.htm#search", code = "http://www.hutter1.net/publ/metasearchcode.zip", issn = "0302-9743", isbn = "978-3-319-26349-6", doi = "10.1007/978-3-319-26350-2_14", keywords = "BFS, DFS, Analytical Algorithm Selection, Average runtime, Meta-heuristics, Tree Search, Probabilistic Goal Distribution", abstract = "BFS and DFS are the two most fundamental search algo- rithms. We derive approximations of their expected runtimes in complete trees, as a function of tree depth and probabilistic goal distribution. We also demonstrate that the analytical approximations are close to the empirical averages for most parameter settings, and that the results can be used to predict the best algorithm given the relevant problem features.", for = "080199(50%),080201(50%)", seo = "970108(100%)", znote = "Acceptance rate (all papers): 57/102 = 56\%", }

@InProceedings{Hutter:15metasearch2, author = "Tom Everitt and Marcus Hutter", title = "Analytical Results on the {BFS} vs. {DFS} Algorithm Selection Problem. {P}art II: {G}raph Search", booktitle = "Proc. 28th Australasian Joint Conference on Artificial Intelligence ({AusAI'15})", address = "Canberra, Australia", series = "LNAI", volume = "9457", _editor = "Bernhard Pfahringer and Jochen Renz", publisher = "Springer", pages = "166--178", _month = dec, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#metasearch2", url = "http://arxiv.org/abs/1509.02709", pdf = "http://www.hutter1.net/publ/metasearch2.pdf", slides = "http://www.hutter1.net/publ/smetasearch.pdf", project = "http://www.hutter1.net/official/projects.htm#search", code = "http://www.hutter1.net/publ/metasearchcode.zip", issn = "0302-9743", isbn = "978-3-319-26349-6", doi = "10.1007/978-3-319-26350-2_15", keywords = "BFS, DFS, Analytical Algorithm Selection, Average runtime, Meta-heuristics, Graph Search, Probabilistic Goal Distribution", abstract = "The algorithm selection problem asks to select the best algorithm for a given problem. In the companion paper (Everitt and Hutter, AusAI, 2015), expected BFS and DFS tree search runtime was approximated as a function of tree depth and probabilistic goal distribution. Here we provide an analogous analysis of BFS and DFS graph search, deriving expected runtime as a function of graph structure and goal distribution. The applicability of the method is demonstrated through analysis of two different grammar problems. The approximations come surprisingly close to empirical reality.", for = "080199(50%),080201(50%)", seo = "970108(100%)", znote = "Acceptance rate (full papers): 39/102 = 38\%", }

@InProceedings{Hutter:15sikscplex, author = "Jan Leike and Marcus Hutter", title = "On the Computability of {S}olomonoff Induction and Knowledge-Seeking", booktitle = "Proc. 26th International Conf. on Algorithmic Learning Theory ({ALT'15})", address = "Banff, Canada", series = "LNAI", volume = "9355", _editor = "Kamalika Chaudhuri and Claudio Gentile and Sandra Zilles", publisher = "Springer", pages = "364--378", _month = oct, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#sikscplex", url = "http://arxiv.org/abs/1507.04124", pdf = "http://www.hutter1.net/publ/sikscplex.pdf", slides = "http://www.hutter1.net/publ/ssikscplex.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "0302-9743", isbn = "978-3-319-24485-3", doi = "10.1007/978-3-319-24486-0_24", keywords = "Solomonoff induction; Exploration; Knowledge-seeking agents; General reinforcement learning; Asymptotic optimality; Computability; Complexity; Arithmetical hierarchy; Universal turing machine; AIXI; BayesExp", abstract = "Solomonoff induction is held as a gold standard for learning, but it is known to be incomputable. We quantify its incomputability by placing various flavors of Solomonoff's prior M in the arithmetical hierarchy. We also derive computability bounds for knowledge-seeking agents, and give a limit-computable weakly asymptotically optimal reinforcement learning agent.", support = "ARC grant DP150104590", for = "080101(50%),080201(50%)", seo = "970108(100%)", znote = "Acceptance rate: 23/46 = 50\%", }

@InProceedings{Hutter:15solraven, author = "Jan Leike and Marcus Hutter", title = "Solomonoff Induction Violates {N}icod's Criterion", booktitle = "Proc. 26th International Conf. on Algorithmic Learning Theory ({ALT'15})", address = "Banff, Canada", series = "LNAI", volume = "9355", _editor = "Kamalika Chaudhuri and Claudio Gentile and Sandra Zilles", publisher = "Springer", pages = "349--363", _month = oct, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#solraven", url = "http://arxiv.org/abs/1507.04121", pdf = "http://www.hutter1.net/publ/solraven.pdf", slides = "http://www.hutter1.net/publ/ssolraven.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", code = "http://www.hutter1.net/publ/solraven.cpp", issn = "0302-9743", isbn = "978-3-319-24485-3", doi = "10.1007/978-3-319-24486-0_23", keywords = "Bayesian reasoning; Confirmation; Disconfirmation; Hempel’s paradox; Equivalence condition; Solomonoff normalization", abstract = "Nicod's criterion states that observing a black raven is evidence for the hypothesis H that all ravens are black. We show that Solomonoff induction does not satisfy Nicod's criterion: there are time steps in which observing black ravens decreases the belief in H. Moreover, while observing any computable infinite string compatible with H, the belief in H decreases infinitely often when using the unnormalized Solomonoff prior, but only finitely often when using the normalized Solomonoff prior. We argue that the fault is not with Solomonoff induction; instead we should reject Nicod’s criterion.", note = "Also presented at CCR: http://math.uni-heidelberg.de/logic/conferences/ccr2015/", support = "ARC grant DP150104590", for = "080199(50%),220399(50%)", seo = "970108(50%),970122(50%)", znote = "Acceptance rate: 23/46 = 50\%", }

@InProceedings{Hutter:15seqdts, author = "Tom Everitt and Jan Leike and Marcus Hutter", title = "Sequential Extensions of Causal and Evidential Decision Theory", booktitle = "Proc. 4th International Conf. on Algorithmic Decision Theory ({ADT'15})", address = "Lexington, USA", series = "LNAI", volume = "9346", _editor = "Toby Walsh", publisher = "Springer", pages = "205--221", _month = sep, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#seqdts", url = "http://arxiv.org/abs/1506.07359", pdf = "http://www.hutter1.net/publ/seqdts.pdf", slides = "http://www.hutter1.net/publ/sseqdts.pdf", project = "http://www.hutter1.net/official/projects.htm#universal", issn = "0302-9743", isbn = "978-3-319-23113-6", doi = "10.1007/978-3-319-23114-3_13", keywords = "Evidential decision theory; Causal decision theory; Planning; Causal graphical models; Dualism; Physicalism", abstract = "Moving beyond the dualistic view in AI where agent and environment are separated incurs new challenges for decision making, as calculation of expected utility is no longer straightforward. The non-dualistic decision theory literature is split between causal decision theory and evidential decision theory. We extend these decision algorithms to the sequential setting where the agent alternates between taking actions and observing their consequences. We find that evidential decision theory has two natural extensions while causal decision theory only has one.", support = "ARC grant DP150104590", for = "080101(50%),220302(50%)", seo = "970108(50%),270122(50%)", znote = "Acceptance rate: 32/70 = 45\%", }

@Article{Hutter:15ratagentx, author = "Peter Sunehag and Marcus Hutter", title = "Rationality, Optimism and Guarantees in General Reinforcement Learning", journal = "Journal of Machine Learning Research", volume = "16", pages = "1345--1390", publisher = "Microtome", _address = "Princeton, NJ, USA", _month = aug, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#ratagentx", url = "http://jmlr.org/papers/v16/sunehag15a.html", pdf = "http://www.hutter1.net/publ/ratagentx.pdf", slides = "http://www.hutter1.net/publ/sagproblaws.pdf", slides = "http://www.hutter1.net/publ/soptcog.pdf", slides = "http://www.hutter1.net/publ/sagscilaws.pdf", slides = "http://www.hutter1.net/publ/saixiopt.pdf", slides = "http://www.hutter1.net/publ/soptopt.pdf", slides = "http://www.hutter1.net/publ/saixiaxiom.pdf", slides = "http://www.hutter1.net/publ/saixiaxiom2.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "1532-4435", keywords = "Reinforcement Learning, Rationality, Optimism, Optimality, Error bounds", abstract = "In this article, we present a top-down theoretical study of general reinforcement learning agents. We begin with rational agents with unlimited resources and then move to a setting where an agent can only maintain a limited number of hypotheses and optimizes plans over a horizon much shorter than what the agent designer actually wants. We axiomatize what is rational in such a setting in a manner that enables optimism, which is important to achieve systematic explorative behavior. Then, within the class of agents deemed rational, we achieve convergence and finite-error bounds. Such results are desirable since they imply that the agent learns well from its experiences, but the bounds do not directly guarantee good performance and can be achieved by agents doing things one should obviously not. Good performance cannot in fact be guaranteed for any agent in fully general settings. Our approach is to design agents that learn well from experience and act rationally. We introduce a framework for general reinforcement learning agents based on rationality axioms for a decision function and an hypothesis-generating function designed so as to achieve guarantees on the number errors. We will consistently use an optimistic decision function but the hypothesis-generating function needs to change depending on what is known/assumed. We investigate a number of natural situations having either a frequentist or Bayesian flavor, deterministic or stochastic environments and either finite or countable hypothesis class. Further, to achieve sufficiently good bounds as to hold promise for practical success we introduce a notion of a class of environments being generated by a set of laws. None of the above has previously been done for fully general reinforcement learning environments.", support = "ARC grant DP120100950", for = "080101(60%),010404(30%),220302(10%)", seo = "970108(90%),970122(10%)", }

@Article{Hutter:15mnonconvx, author = "Tor Lattimore and Marcus Hutter", title = "On {M}artin-L{\"o}f (Non)Convergence of {S}olomonoff's Universal Mixture", journal = "Theoretical Computer Science", volume = "588", pages = "2--15", publisher = "Elsevier", _month = jul, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#mnonconvx", pdf = "http://www.hutter1.net/publ/mnonconvx.pdf", slides = "http://www.hutter1.net/publ/smnonconv.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", issn = "0304-3975", doi = "10.1016/j.tcs.2014.12.004", keywords = "Solomonoff induction, Kolmogorov complexity, theory of computation.", abstract = "We study the convergence of Solomonoff's universal mixture on individual Martin-Löf random sequences. A new result is presented extending the work of Hutter and Muchnik [3] by showing that there does not exist a universal mixture that converges on all Martin-Löf random sequences. We show that this is not an artifact of the fact that the universal mixture is not a proper measure and that the normalised universal mixture also fails to converge on all Martin-Löf random sequences.", for = "080401(50%),010404(30%),010405(20%)", seo = "970101(30%),970108(70%)", }

@InProceedings{Hutter:15learncnf, author = "Joel Veness and Marcus Hutter and Laurent Orseau and Marc Bellemare", title = "Online Learning of {k-CNF} Boolean Functions", booktitle = "Proc. 24th International Joint Conf. on Artificial Intelligence ({IJCAI'15})", address = "Buenos Aires, Argentina", _editor = "Qiang Yang and Michael Wooldridge", publisher = "AAAI Press", pages = "3865--3873", _month = jul, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#learncnf", url = "http://arxiv.org/abs/1403.6863", pdf = "http://www.hutter1.net/publ/learncnf.pdf", slides = "http://www.hutter1.net/publ/slearncnf.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", isbn = "978-1-57735-738-4", keywords = "k-CNF, Online Learning, Logarithmic Loss, Bayesian algorithm", abstract = "This paper revisits the problem of learning a k-CNF Boolean function from examples, for fixed k, in the context of online learning under the logarithmic loss. We give a Bayesian interpretation to one of Valiant's classic PAC learning algorithms, which we then build upon to derive three efficient, online, probabilistic, supervised learning algorithms for predicting the output of an unknown k-CNF Boolean function. We analyze the loss of our methods, and show that the cumulative log-loss can be upper bounded by a polynomial function of the size of each example.", support = "ARC grant DP150104590", for = "080101(30%),010404(30%),080201(40%)", seo = "970108(100%)", znote = "Acceptance rate: 572/1996 = 29\%", }

@InProceedings{Hutter:15agproblaws, author = "Peter Sunehag and Marcus Hutter", title = "Using Localization and Factorization to Reduce the Complexity of Reinforcement Learning", booktitle = "Proc. 8th Conf. on Artificial General Intelligence ({AGI'15})", address = "Berlin, Germany", series = "LNAI", volume = "9205", pages = "177--186", _editor = "Jordi Bieger and Ben Goertzel and Alexey Potapov", publisher = "Springer", _month = jul, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#agproblaws", pdf = "http://www.hutter1.net/publ/agproblaws.pdf", slides = "http://www.hutter1.net/publ/sagproblaws.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-319-21365-1_19", issn = "0302-9743", isbn = "978-3-319-21364-4", keywords = "reinforcement learning; laws; optimism; bounds", abstract = "General reinforcement learning is a powerful framework for artificial intelligence that has seen much theoretical progress since introduced fifteen years ago. We have previously provided guarantees for cases with finitely many possible environments. Though the results are the best possible in general, a linear dependence on the size of the hypothesis class renders them impractical. However, we dramatically improved on these by introducing the concept of environments generated by combining laws. The bounds are then linear in the number of laws needed to generate the environment class. This number is identified as a natural complexity measure for classes of environments. The individual law might only predict some feature (factorization) and only in some contexts (localization). We here extend previous deterministic results to the important stochastic setting.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(80%),970122(20%)", znote = "Acceptance rate: 41/72 = 57\%.", }

@InProceedings{Hutter:15aixicplex, author = "Jan Leike and Marcus Hutter", title = "On the Computability of AIXI", booktitle = "Proc. 31st International Conf. on Uncertainty in Artificial Intelligence ({UAI'15})", address = "Amsterdam, Netherlands", _editor = "Marina Meila and Tom Heskes", publisher = "AUAI Press", pages = "464--473", _month = jul, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#aixicplex", url = "http://arxiv.org/abs/1510.05572", pdf = "http://www.hutter1.net/publ/aixicplex.pdf", slides = "http://www.hutter1.net/publ/saixicplex.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", isbn = "978-0-9966431-0-8", keywords = "AIXI; Solomonoff induction; general reinforcement learning; computability; complexity; arithmetical hierarchy; universal Turing machine.", abstract = "How could we solve the machine learning and the artificial intelligence problem if we had infinite computation? Solomonoff induction and the reinforcement learning agent AIXI are proposed answers to this question. Both are known to be incomputable. In this paper, we quantify this using the arithmetical hierarchy, and prove upper and corresponding lower bounds for incomputability. We show that AIXI is not limit computable, thus it cannot be approximated using finite computation. Our main result is a limit-computable epsilon-optimal version of AIXI with infinite horizon that maximizes expected rewards.", support = "ARC grant DP150104590", for = "080101(50%),080201(50%)", seo = "970108(100%)", znote = "Acceptance rate: 99/291=34\%", }

@Article{Hutter:15aixiprior, author = "Jan Leike and Marcus Hutter", title = "Bad Universal Priors and Notions of Optimality", journal = "Journal of Machine Learning Research, W\&CP: COLT", volume = "40", pages = "1244--1259", _editor = "Peter Grünwald and Elad Hazan", publisher = "", _address = "Princeton, NJ, USA", _month = jul, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#aixiprior", http = "http://jmlr.org/proceedings/papers/v40/Leike15.html", url = "http://arxiv.org/abs/1510.04931", pdf = "http://www.hutter1.net/publ/aixiprior.pdf", slides = "http://www.hutter1.net/publ/saixiprior.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "1532-4435", keywords = "AIXI, general reinforcement learning, universal Turing machine, Legg-Hutter intelligence, balanced Pareto optimality, asymptotic optimality.", abstract = "A big open question of algorithmic information theory is the choice of the universal Turing machine (UTM). For Kolmogorov complexity and Solomonoff induction we have invariance theorems: the choice of the UTM changes bounds only by a constant. For the universally intelligent agent AIXI (Hutter, 2005) no invariance theorem is known. Our results are entirely negative: we discuss cases in which unlucky or adversarial choices of the UTM cause AIXI to misbehave drastically. We show that Legg-Hutter intelligence and thus balanced Pareto optimality is entirely subjective, and that every policy is Pareto optimal in the class of all computable environments. This undermines all existing optimality properties for AIXI. While it may still serve as a gold standard for AI, our results imply that AIXI is a relative theory, dependent on the choice of the UTM. ", note = "Also presented at EWRL'15. http://ewrl.files.wordpress.com/2015/02/ewrl12\_2015\_submission\_3.pdf", support = "ARC grant DP150104590", for = "080101(80%),080401(20%)", seo = "970108(100%)", znote = "28th Annual Conf. on Learning Theory. Acceptance rate: 27/176 = 15\%", }

@InCollection{Hutter:15aitcog, author = "Peter Sunehag and Marcus Hutter", title = "Algorithmic Complexity", booktitle = "International Encyclopedia of the Social \& Behavioral Sciences", volume = "1", pages = "534–-538", editor = "James D. Wright", publisher = "Elsevier", _month = apr, year = "2015", edition = "2nd", bibtex = "http://www.hutter1.net/official/bib.htm#aitcog", pdf = "http://www.hutter1.net/publ/aitcog.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", isbn = "978-0-080-97086-8", doi = "10.1016/B978-0-08-097086-8.43001-1", keywords = "Kolmogorov Complexity, Algorithmic Information Theory, Cognition, Rationality, Simplicity, Optimism, Induction, Similarity, Clustering, Prediction, Agents, Learning, Reinforcement", abstract = "Algorithmic complexity provides a mathematical formal notion of string complexity. Building on this, one arrives at mathematical ‘gold standard’ (though incomputable) definitions of randomness, induction, similarity, and even intelligence. These definitions can be turned into practical algorithms by using common compressors to approximate the universal solutions. One can consider the theories as idealized cognition with respect to which one can aim to describe actual biological cognition by listing biases and limitations that need to be defined relative to some normative reference.", support = "ARC grant DP120100950", for = "170203(50%),080401(50%)", seo = "970117(100%)", }

@InProceedings{Hutter:15cnc, author = "Joel Veness and Marc Bellemare and Marcus Hutter and Alvin Chua and Guillaume Desjardins", title = "Compress and Control", booktitle = "Proc. 29th {AAAI} Conference on Artificial Intelligence ({AAAI'15})", address = "Austin, USA", pages = "3016--3023", _editor = "Blai Bonet and Sven Koenig", publisher = "AAAI Press", _month = jan, year = "2015", bibtex = "http://www.hutter1.net/official/bib.htm#cnc", url = "http://arxiv.org/abs/1411.5326", pdf = "http://www.hutter1.net/publ/cnc.pdf", slides = "http://www.hutter1.net/publ/scnc.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "2159-5399", isbn = "978-1-57735-698-1", keywords = "reinforcement learning, compression, Q-value function, policy evaluation, density estimation, on-policy control, Pong, Freeway, Q*Bert", abstract = "This paper describes a new information-theoretic policy evaluation technique for reinforcement learning. This technique converts any compression or density model into a corresponding estimate of value. Under appropriate stationarity and ergodicity conditions, we show that the use of a sufficiently powerful model gives rise to a consistent value function estimator. We also study the behavior of this technique when applied to various Atari 2600 video games, where the use of suboptimal modeling techniques is unavoidable. We consider three fundamentally different models, all too limited to perfectly model the dynamics of the system. Remarkably, we find that our technique provides sufficiently accurate value estimates for effective on-policy control. We conclude with a suggestive study highlighting the potential of our technique to scale to large problems.", for = "080101(100%)", seo = "970108(100%)", znote = "Acceptance rate: 531/1991 = 27\%. Oral 200?/1991=10\%", }

## %-------------Publications-of-Marcus-Hutter-2014--------------%

@TechReport{Hutter:14cbayeskl, author = "Tor Lattimore and Marcus Hutter", title = "Asymptotics of Continuous Bayes for Non-i.i.d. Sources", pages = "1--16", _month = nov, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#cbayeskl", url = "http://arxiv.org/abs/1411.2918", pdf = "http://www.hutter1.net/publ/cbayeskl.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", keywords = "entropy; stochastic process; Bayes; non-stationary; dependence; sequence prediction; compression", abstract = "Clarke and Barron analysed the relative entropy between an i.i.d. source and a Bayesian mixture over a continuous class containing that source. In this paper a comparable result is obtained when the source is permitted to be both non-stationary and dependent. The main theorem shows that Bayesian methods perform well for both compression and sequence prediction even in this most general setting with only mild technical assumptions.", }

@InProceedings{Hutter:14rladvice, author = "Mayank Daswani and Peter Sunehag and Marcus Hutter", title = "Reinforcement Learning with Value Advice", booktitle = "Proc. 6th Asian Conf. on Machine Learning ({ACML'14})", volume = "39", pages = "299--314", _editor = "Dinh Phung and Hang Li", publisher = "JMLR", address = "Canberra, Australia", _month = nov, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#rladvice", pdf = "http://www.hutter1.net/publ/rladvice.pdf", slides = "http://www.hutter1.net/publ/srladvice.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", issn = "1532-4435", doi = "http://jmlr.org/proceedings/papers/v39/daswani14.pdf", keywords = "feature reinforcement learning; imitation learning; dataset aggregation; value advice; upper confidence tree; Monte Carlo search; Arcade learning environment.", abstract = "The problem we consider in this paper is reinforcement learning with value advice. In this setting, the agent is given limited access to an oracle that can tell it the expected return (value) of any state-action pair with respect to the optimal policy. The agent must use this value to learn an explicit policy that performs well in the environment. We provide an algorithm called RLAdvice, based on the imitation learning algorithm DAgger. We illustrate the effectiveness of this method in the Arcade Learning Environment on three different games, using value estimates from UCT as advice.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(100%)", znote = "Acceptance rate: 25/80 = 31\%", }

@InProceedings{Hutter:14reflect, author = "Di Yang and Srimal Jayawardena and Stephen Gould and Marcus Hutter", title = "Reflective Features Detection and Hierarchical Reflections Separation in Image Sequences", booktitle = "Proc. 16th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'14})", pages = "1--7", _editor = "S.L. Phung and A. Bouzerdoum and P. Ogunbona and W. Li and L. Wang", publisher = "IEEE Xplore", address = "Wollongong, Australia", _month = nov, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#reflect", pdf = "http://www.hutter1.net/publ/reflect.pdf", slides = "http://www.hutter1.net/publ/sreflect.pdf", project = "http://www.hutter1.net/official/projects.htm#icar", doi = "10.1109/DICTA.2014.7008127", isbn = "978-1-4799-5409-4", keywords = "computer vision; reflection detection; support vector machine; automatic.", abstract = "Computer vision techniques such as Structure-from-Motion (SfM) and object recognition tend to fail on scenes with highly reflective objects because the reflections behave differently to the true geometry of the scene. Such image sequences may be treated as two layers superimposed over each other - the nonreflection scene source layer and the reflection layer. However, decomposing the two layers is a very challenging task as it is ill-posed and common methods rely on prior information. This work presents an automated technique for detecting reflective features with a comprehensive analysis of the intrinsic, spatial, and temporal properties of feature points. A support vector machine (SVM) is proposed to learn reflection feature points. Predicted reflection feature points are used as priors to guide the reflection layer separation. This gives more robust and reliable results than what is achieved by performing layer separation alone.", support = "ControlExpert GmbH", for = "080104(50%),080106(50%)", }

@Article{Hutter:14pacmdpx, author = "Tor Lattimore and Marcus Hutter", title = "Near-Optimal {PAC} bounds for discounted {MDP}s", journal = "Theoretical Computer Science", volume = "558", pages = "125--143", publisher = "Elsevier", _month = nov, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#pacmdpx", pdf = "http://www.hutter1.net/publ/pacmdpx.pdf", slides = "http://www.hutter1.net/publ/spacmdp.pdf", project = "http://www.hutter1.net/official/projects.htm#agents", issn = "0304-3975", doi = "10.1016/j.tcs.2014.09.029", keywords = "Sample-complexity; PAC bounds; Markov decision processes; Reinforcement learning", abstract = "We study upper and lower bounds on the sample-complexity of learning near-optimal behaviour in finite-state discounted Markov Decision Processes (MDPs). We prove a new bound for a modified version of Upper Confidence Reinforcement Learning (UCRL) with only cubic dependence on the horizon. The bound is unimprovable in all parameters except the size of the state/action space, where it depends linearly on the number of non-zero transition probabilities. The lower bound strengthens previous work by being both more general (it applies to all policies) and tighter. The upper and lower bounds match up to logarithmic factors provided the transition matrix is not too dense.", support = "ARC grant DP120100950", for = "010404(30%),010405(30%),080198(40%)", seo = "970108(100%)", }

@InProceedings{Hutter:14ktoptdif, author = "Tansu Alpcan and Tom Everitt and Marcus Hutter", title = "Can we Measure the Difficulty of an Optimization Problem?", booktitle = "{IEEE} Information Theory Workshop", pages = "356--360", _editor = "Yi Hong and Jamie Evans and Emanuele Viterbo and Urbashi Mitra", publisher = "IEEE Press", address = "Hobart, Australia", _month = nov, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#ktoptdif", pdf = "http://www.hutter1.net/publ/ktoptdif.pdf", slides = "http://www.hutter1.net/publ/sktoptdif.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", issn = "1662-9019", isbn = "978-1-4799-5998-0", doi = "10.1109/ITW.2014.6970853", keywords = "open box optimization; problem complexity; algorithmic information theory", abstract = "Can we measure the difficulty of an optimization problem? Although optimization plays a crucial role in modern science and technology, a formal framework that puts problems and solution algorithms into a broader context has not been established. This paper presents a conceptual approach which gives a positive answer to the question for a broad class of optimization problems. Adopting an information and computational perspective, the proposed framework builds upon Shannon and algorithmic information theories. As a starting point, a concrete model and definition of optimization problems is provided. Then, a formal definition of optimization difficulty is introduced which builds upon algorithmic information theory. Following an initial analysis, lower and upper bounds on optimization difficulty are established. One of the upper-bounds is closely related to Shannon information theory and black-box optimization. Finally, various computational issues and future research directions are discussed.", for = "080401(70%),080198(30%)", seo = "970801(100%)", }

@InProceedings{Hutter:14epipolar, author = "S. Jayawardena and S. Gould and H. Li and M. Hutter and R. Hartley", title = "Reliable Point Correspondences in Scenes Dominated by Highly Reflective and Largely Homogeneous Surfaces", booktitle = "Proc. 12th Asian Conf. on Computer Vision -- Workshop ({RoLoD@ACCV'14}) Part I", address = "Singapore", series = "LNCS", volume = "9008", pages = "659--674", _editor = "C. V. Jawahar and Shiguang Shan", publisher = "Springer", _month = nov, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#epipolar", pdf = "http://www.hutter1.net/publ/epipolar.pdf", slides = "http://www.hutter1.net/publ/sepipolar.pdf", project = "http://www.hutter1.net/official/projects.htm#icar", issn = "0302-9743", isbn = "978-3-319-16627-8", doi = "10.1007/978-3-319-16628-5_47", keywords = "point correspondences; reflections; homogeneous; texture impoverished; epipolar geometry; fundamental matrix; structure from motion; noisy.", abstract = "Common Structure from Motion (SfM) tasks require reliable point correspondences in images taken from different views to subsequently estimate model parameters which describe the 3D scene geometry. For example when estimating the fundamental matrix from point correspondences using RANSAC. The amount of noise in the point correspondences drastically affect the estimation algorithm and the number of iterations needed for convergence grows exponentially with the level of noise. In scenes dominated by highly reflective and largely homogeneous surfaces such as vehicle panels and buildings with a lot of glass, existing approaches give a very high proportion of spurious point correspondences. As a result the number of iterations required for subsequent model estimation algorithms become intractable. We propose a novel method that uses descriptors evaluated along points in image edges to obtain a sufficiently high proportion of correct point correspondences. We show experimentally that our method gives better results in recovering the epipolar geometry in scenes dominated by highly reflective and homogeneous surfaces compared to common baseline methods on stereo images taken from considerably wide baselines.", support = "ControlExpert GmbH", for = "080106(100%)", seo = "970108(80%),890205(20%)", znote = "Acceptance rate: 153/307 = 50\%", }

@InProceedings{Hutter:14martosc, author = "Jan Leike and Marcus Hutter", title = "Indefinitely Oscillating Martingales", booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})", address = "Bled, Slovenia", series = "LNAI", volume = "8776", _editor = "Peter Auer and Alexander Clark", publisher = "Springer", pages = "321--335", _month = oct, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#martosc", url = "http://arxiv.org/abs/1408.3169", pdf = "http://www.hutter1.net/publ/martosc.pdf", latex = "http://www.hutter1.net/publ/martosc.tex", slides = "http://www.hutter1.net/publ/smartosc.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", issn = "0302-9743", isbn = "978-3-319-11661-7", doi = "10.1007/978-3-319-11662-4_23", keywords = "martingales, infinite oscillations, bounds, convergence rates, minimum description length, mind changes.", abstract = "We construct a class of nonnegative martingale processes that oscillate indefinitely with high probability. For these processes, we state a uniform rate of the number of oscillations for a given magnitude and show that this rate is asymptotically close to the theoretical upper bound. These bounds on probability and expectation of the number of upcrossings are compared to classical bounds from the martingale literature. We discuss two applications. First, our results imply that the limit of the minimum description length operator may not exist. Second, we give bounds on how often one can change one’s belief in a given hypothesis when observing a stream of data.", for = "010405(60%),080101(40%)", seo = "970101(60%),970108(40%)", znote = "Acceptance rate: 21/50 = 42\%", }

@InProceedings{Hutter:14off2on, author = "Marcus Hutter", title = "Offline to Online Conversion", booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})", address = "Bled, Slovenia", series = "LNAI", volume = "8776", _editor = "Peter Auer and Alexander Clark", publisher = "Springer", pages = "230--244", _month = oct, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#off2on", url = "http://arxiv.org/abs/1407.3334", pdf = "http://www.hutter1.net/publ/off2on.pdf", latex = "http://www.hutter1.net/publ/off2on.tex", slides = "http://www.hutter1.net/publ/soff2on.pdf", project = "http://www.hutter1.net/official/projects.htm#infoth", issn = "0302-9743", isbn = "978-3-319-11661-7", doi = "10.1007/978-3-319-11662-4_17", keywords = "offline; online; batch; sequential; probability; estimation; prediction; time-consistency; normalization; tractable; regret; combinatorics; Bayes; Laplace; Ristad; Good-Turing.", abstract = "We consider the problem of converting offline estimators into an online predictor or estimator with small extra regret. Formally this is the problem of merging a collection of probability measures over strings of length 1,2,3,... into a single probability measure over infinite sequences. We describe various approaches and their pros and cons on various examples. As a side-result we give an elementary non-heuristic purely combinatoric derivation of Turing's famous estimator. Our main technical contribution is to determine the computational complexity of online estimators with good guarantees in general.", for = "080401(30%),080201(30%),010405(40%)", seo = "970108(100%)", znote = "Acceptance rate: 21/50 = 42\%", }

@InProceedings{Hutter:14exsagg, author = "Marcus Hutter", title = "Extreme State Aggregation beyond {MDP}s", booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})", address = "Bled, Slovenia", series = "LNAI", volume = "8776", _editor = "Peter Auer and Alexander Clark", publisher = "Springer", pages = "185--199", _month = oct, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#exsagg", url = "http://arxiv.org/abs/1407.3341", pdf = "http://www.hutter1.net/publ/exsagg.pdf", latex = "http://www.hutter1.net/publ/exsagg.tex", slides = "http://www.hutter1.net/publ/sexsagg.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", issn = "0302-9743", isbn = "978-3-319-11661-7", doi = "10.1007/978-3-319-11662-4_14", keywords = "state aggregation, reinforcement learning, non-MDP.", abstract = "We consider a Reinforcement Learning setup without any (esp.\ MDP) assumptions on the environment. State aggregation and more generally feature reinforcement learning is concerned with mapping histories/raw-states to reduced/aggregated states. The idea behind both is that the resulting reduced process (approximately) forms a small stationary finite-state MDP, which can then be efficiently solved or learnt. We considerably generalize existing aggregation results by showing that even if the reduced process is not an MDP, the (q-)value functions and (optimal) policies of an associated MDP with same state-space size solve the original problem, as long as the solution can approximately be represented as a function of the reduced states. This implies an upper bound on the required state space size that holds uniformly for all RL problems. It may also explain why RL algorithms designed for MDPs sometimes perform well beyond MDPs.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(100%)", znote = "Acceptance rate: 21/50 = 42\%", }

@InProceedings{Hutter:14pacbayes, author = "Tor Lattimore and Marcus Hutter", title = "Bayesian Reinforcement Learning with Exploration", booktitle = "Proc. 25th International Conf. on Algorithmic Learning Theory ({ALT'14})", address = "Bled, Slovenia", series = "LNAI", volume = "8776", _editor = "Peter Auer and Alexander Clark", publisher = "Springer", pages = "170--184", _month = oct, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#pacbayes", pdf = "http://www.hutter1.net/publ/pacbayes.pdf", slides = "http://www.hutter1.net/publ/spacbayes.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", issn = "0302-9743", isbn = "978-3-319-11661-7", doi = "10.1007/978-3-319-11662-4_13", keywords = "reinforcement learning; sample complexity; Bayes-optimal; exploration; PAC bounds", abstract = "We consider a general reinforcement learning problem and show that carefully combining the Bayesian optimal policy and an exploring policy leads to minimax sample-complexity bounds in a very general class of (history-based) environments. We also prove lower bounds and show that the new algorithm displays adaptive behaviour when the environment is easier than worst-case.", support = "ARC grant DP120100950", for = "080101(80%),010404(40%)", seo = "970108(100%)", znote = "Acceptance rate: 21/50 = 42\%", }

@InProceedings{Hutter:14learnutm, author = "Peter Sunehag and Marcus Hutter", title = "Intelligence as Inference or Forcing {O}ccam on the World", booktitle = "Proc. 7th Conf. on Artificial General Intelligence ({AGI'14})", series = "LNAI", volume = "8598", pages = "186--195", _editor = "Ben Goertzel and Laurent Orseau and Javier Snaider", publisher = "Springer", address = "Quebec City, Canada", _month = aug, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#learnutm", pdf = "http://www.hutter1.net/publ/learnutm.pdf", slides = "http://www.hutter1.net/publ/slearnutm.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-319-09274-4_18", issn = "0302-9743", isbn = "978-3-319-09273-7", keywords = "Ockham; Universal; Intelligence; Learning; Turing Machine; Expectation Maximization; Evolution; Reasoning; Agents; Reward.", abstract = "We propose to perform the optimization task of Universal Artificial Intelligence (UAI) through learning a reference machine on which good programs are short. Further, we also acknowledge that the choice of reference machine that the UAI objective is based on is arbitrary and, therefore, we learn a suitable machine for the environment we are in. This is based on viewing Occam's razor as an imperative instead of as a proposition about the world. Since this principle cannot be true for all reference machines, we need to find a machine that makes the principle true. We both want good policies and the environment to have short implementations on the machine. Such a machine is learnt iteratively through a procedure that generalizes the principle underlying the Expectation-Maximization algorithm.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(80%),970122(20%)", znote = "Acceptance rate: 22/65 = 34\%.", }

@InProceedings{Hutter:14optcog, author = "Peter Sunehag and Marcus Hutter", title = "A Dual Process Theory of Optimistic Cognition", booktitle = "Proc. 36th Annual Meeting of the Cognitive Science Society ({CogSci'14})", pages = "2949--2954", _editor = "Paul Bello and Marcello Guarini and Marjorie McShane and Brian Scassellati", publisher = "Curran Associates", address = "Quebec City, Canada", _month = jul, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#optcog", http = "http://mindmodeling.org/cogsci2014/papers/509/paper509.pdf", pdf = "http://www.hutter1.net/publ/optcog.pdf", slides = "http://www.hutter1.net/publ/soptcog.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", isbn = "978-1-63439-116-0", keywords = "Rationality, Optimism, Optimality, Reinforcement Learning", abstract = "Optimism is a prevalent bias in human cognition including variations like self-serving beliefs, illusions of control and overly positive views of one’s own future. Further, optimism has been linked with both success and happiness. In fact, it has been described as a part of human mental well-being which has otherwise been assumed to be about being connected to reality. In reality, only people suffering from depression are realistic. Here we study a formalization of optimism within a dual process framework and study its usefulness beyond human needs in a way that also applies to artificial reinforcement learning agents. Optimism enables systematic exploration which is essential in an (partially) unknown world. The key property of an optimistic hypothesis is that if it is not contradicted when one acts greedily with respect to it, then one is well rewarded even if it is wrong.", support = "ARC grant DP120100950", for = "080101(50%),170202(50%)", seo = "970108(70%),970117(30%)", }

@InProceedings{Hutter:14frlabs, author = "Mayank Daswani and Peter Sunehag and Marcus Hutter", title = "Feature Reinforcement Learning: State of the Art", booktitle = "Proc. Workshops at the 28th {AAAI} Conference on Artificial Intelligence: Sequential Decision Making with Big Data", pages = "2--5", _editor = "Amir-Massoud Farahmand et al.", publisher = "AAAI Press", address = "Quebec City, Canada", _month = jul, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#frlabs", http = "http://www.aaai.org/ocs/index.php/WS/AAAIW14/paper/view/8791", pdf = "http://www.hutter1.net/publ/frlabs.pdf", slides = "http://www.hutter1.net/publ/sfrlabs.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", keywords = "Reinforcement learning; temporal difference learning; partial observability; Q-learning; feature learning; function approximation; rational agents.", abstract = "Feature reinforcement learning was introduced five years ago as a principled and practical approach to history-based learn- ing. This paper examines the progress since its inception. We now have both model-based and model-free cost functions, most recently extended to the function approximation setting. Our current work is geared towards playing ATARI games us- ing imitation learning, where we use Feature RL as a feature selection method for high-dimensional domains", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(100%)", znote = "http://sites.google.com/site/decisionmakingbigdata/", }

@InProceedings{Hutter:14floud, author = "Tom Everitt and Tor Lattimore and Marcus Hutter", title = "Free Lunch for Optimisation under the Universal Distribution", booktitle = "Proc. 2014 Congress on Evolutionary Computation ({CEC'14})", pages = "167--174", _editor = "Derong Liu and Jennie Si", publisher = "IEEE", address = "Beijing, China", _month = jul, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#floud", url = "http://arxiv.org/abs/1608.04544", pdf = "http://www.hutter1.net/publ/floud.pdf", slides = "http://www.hutter1.net/publ/sfloud.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", isbn = "978-1-4799-6626-4", doi = "10.1109/CEC.2014.6900546", keywords = "function optimization; universal prior; Occam's Razor; No Free Lunch.", abstract = "Function optimisation is a major challenge in computer science. The No Free Lunch theorems state that if all functions with the same histogram are assumed to be equally probable then no algorithm outperforms any other in expectation. We argue against the uniform assumption and suggest a universal prior exists for which there is a free lunch, but where no particular class of functions is favoured over another. We also prove upper and lower bounds on the size of the free lunch.", for = "080199(70%),010404(30%)", seo = "970108(100%)", }

@Article{Hutter:14tcdiscx, author = "Tor Lattimore and Marcus Hutter", title = "General Time Consistent Discounting", journal = "Theoretical Computer Science", volume = "519", pages = "140--154", publisher = "Elsevier", _month = jan, year = "2014", bibtex = "http://www.hutter1.net/official/bib.htm#tcdiscx", pdf = "http://www.hutter1.net/publ/tcdiscx.pdf", slides = "http://www.hutter1.net/publ/stcdisc.pdf", project = "http://www.hutter1.net/official/projects.htm#agents", issn = "0304-3975", doi = "10.1016/j.tcs.2013.09.022", keywords = "Rational agents; sequential decision theory; general discounting; time-consistency; game theory.", abstract = "Modeling inter-temporal choice is a key problem in both computer science and economic theory. The discounted utility model of Samuelson is currently the most popular model for measuring the global utility of a time-series of local utilities. The model is limited by not allowing the discount function to change with the age of the agent. This is despite the fact that many agents, in particular humans, are best modelled with age-dependent discount functions. It is well known that discounting can lead to time-inconsistent behaviour where agents change their preferences over time. In this paper we generalise the discounted utility model to allow age-dependent discount functions. We then extend previous work in time-inconsistency to our new setting, including a complete characterisation of time-(in)consistent discount functions, the existence of sub-game perfect equilibrium policies where the discount function is time-inconsistent and a continuity result showing that ``nearly'' time-consistent discount rates lead to ``nearly'' time-consistent behaviour.", for = "010405(20%),080101(40%),140104(20%),170202(20%)", seo = "970108(40%),970114(30%),970117(30%)", }

## %-------------Publications-of-Marcus-Hutter-2013--------------%

@Article{Hutter:13uai4lay, author = "Marcus Hutter", title = "To Create a Super-Intelligent Machine, Start with an Equation", journal = "The Conversation", volume = "November", number = "29", pages = "1--5", _month = nov, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#uai4lay", url = "http://theconversation.com/to-create-a-super-intelligent-machine-start-with-an-equation-20756", pdf = "http://www.hutter1.net/publ/uai4lay.pdf", slides = "http://www.hutter1.net/publ/suai4lay.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", keywords = "intelligence; mathematics; learning; planning; rational agents; foundations.", abstract = "Intelligence is a very difficult concept and, until recently, no one has succeeded in giving it a satisfactory formal definition. Most researchers have given up grappling with the notion of intelligence in full generality, and instead focus on related but more limited concepts – but I argue that mathematically defining intelligence is not only possible, but crucial to understanding and developing super-intelligent machines. From this, my research group has even successfully developed software that can learn to play Pac-Man from scratch.", for = "080401(20%),080101(30%),080199(30%),220399(20%)", seo = "970108(80%),870122(20%)", znote = "Top 10 of 700+ ANU articles till 2013. 25'000+ views. http://theconversation.com/metrics/institutions/australian-national-university/article_leaderboard", }

@InProceedings{Hutter:13rlqh, author = "Mayank Daswani and Peter Sunehag and Marcus Hutter", title = "Q-Learning for History-Based Reinforcement Learning", booktitle = "Proc. 5th Asian Conf. on Machine Learning ({ACML'13})", volume = "29", pages = "213--228", _editor = "Tu Bao Ho and Cheng Soon Ong", publisher = "JMLR", address = "Canberra, Australia", _month = nov, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#rlqh", http = "http://proceedings.mlr.press/v29/Daswani13.html", pdf = "http://www.hutter1.net/publ/rlqh.pdf", slides = "http://www.hutter1.net/publ/srlqh.pdf", poster = "http://www.hutter1.net/publ/prlqh.pdf", project = "http://www.hutter1.net/official/projects.htm#frl", issn = "1532-4435", doi = "http://jmlr.org/proceedings/papers/v29/Daswani13.pdf", keywords = "feature reinforcement learning; temporal difference learning; Markov decision process; partial observability; Q-learning; Monte Carlo search; Pocman; rational agents.", abstract = "We extend the Q-learning algorithm from the Markov Decision Process setting to problems where observations are non-Markov and do not reveal the full state of the world i.e. to POMDPs. We do this in a natural manner by adding l0 regularisation to the pathwise squared Q-learning objective function and then optimise this over both a choice of map from history to states and the resulting MDP parameters. The optimisation procedure involves a stochastic search over the map class nested with classical Q-learning of the parameters. This algorithm fits perfectly into the feature reinforcement learning framework, which chooses maps based on a cost criteria. The cost criterion used so far for feature reinforcement learning has been model-based and aimed at predicting future states and rewards. Instead we directly predict the return, which is what is needed for choosing optimal actions. Our Q-learning criteria also lends itself immediately to a function approximation setting where features are chosen based on the history. This algorithm is somewhat similar to the recent line of work on lasso temporal difference learning which aims at finding a small feature set with which one can perform policy evaluation. The distinction is that we aim directly for learning the Q-function of the optimal policy and we use l0 instead of l1 regularisation. We perform an experimental evaluation on classical benchmark domains and find improvement in convergence speed as well as in economy of the state representation. We also compare against MC-AIXI on the large Pocman domain and achieve competitive performance in average reward. We use less than half the CPU time and 36 times less memory. Overall, our algorithm hQL provides a better combination of computational, memory and data efficiency than existing algorithms in this setting.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(100%)", znote = "long presentation: Acceptance rate: 13/103 = 13\%", }

@Article{Hutter:13problogic, author = "Marcus Hutter and John W. Lloyd and Kee Siong Ng and William T.B. Uther", title = "Probabilities on Sentences in an Expressive Logic", journal = "Journal of Applied Logic", volume = "11", pages = "386--420", _publisher = "Elsevier", _month = nov, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#problogic", url = "http://arxiv.org/abs/1209.2620", pdf = "http://www.hutter1.net/publ/problogic.pdf", latex = "http://www.hutter1.net/publ/problogic.tex", slides = "http://www.hutter1.net/publ/sproblogic.pdf", video = "http://www.youtube.com/watch?v=WEkZSHcRsAM", project = "http://www.hutter1.net/official/projects.htm#logic", doi = "10.1016/j.jal.2013.03.003", issn = "1570-8683", keywords = "higher-order logic; probability on sentences; Gaifman; Cournot; Bayes; induction; confirmation; learning; prior; knowledge; entropy.", abstract = "Automated reasoning about uncertain knowledge has many applications. One difficulty when developing such systems is the lack of a completely satisfactory integration of logic and probability. We address this problem directly. Expressive languages like higher-order logic are ideally suited for representing and reasoning about structured knowledge. Uncertain knowledge can be modeled by using graded probabilities rather than binary truth-values. The main technical problem studied in this paper is the following: Given a set of sentences, each having some probability of being true, what probability should be ascribed to other (query) sentences? A natural wish-list, among others, is that the probability distribution (i) is consistent with the knowledge base, (ii) allows for a consistent inference procedure and in particular (iii) reduces to deductive logic in the limit of probabilities being 0 and 1, (iv) allows (Bayesian) inductive reasoning and (v) learning in the limit and in particular (vi) allows confirmation of universally quantified hypotheses/sentences. We translate this wish-list into technical requirements for a prior probability and show that probabilities satisfying all our criteria exist. We also give explicit constructions and several general characterizations of probabilities that satisfy some or all of the criteria and various (counter) examples. We also derive necessary and sufficient conditions for extending beliefs about finitely many sentences to suitable probabilities over all sentences, and in particular least dogmatic or least biased ones. We conclude with a brief outlook on how the developed theory might be used and approximated in autonomous reasoning agents. Our theory is a step towards a globally consistent and empirically satisfactory unification of probability and logic.", support = "ARC grant DP0877635", for = "080203(50%),010404(30%),080401(10%),080101(10%)", seo = "970108(80%),970101(20%)", znote = "Presented at Progic 2011: http://sites.google.com/site/progicconference2011/ and at WL4AI@IJCAI 2013: http://ijcai13.org/program/workshop/32", }

@InProceedings{Hutter:13ksaprob, author = "Laurent Orseau and Tor Lattimore and Marcus Hutter", title = "Universal Knowledge-Seeking Agents for Stochastic Environments", booktitle = "Proc. 24th International Conf. on Algorithmic Learning Theory ({ALT'13})", address = "Singapore", series = "LNAI", volume = "8139", _editor = "S. Jain and R. Munos and F. Stephan and Th. Zeugmann", publisher = "Springer", pages = "158--172", _month = oct, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#ksaprob", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT13/", pdf = "http://www.hutter1.net/publ/ksaprob.pdf", slides = "http://www.hutter1.net/publ/sksaprob.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-40935-6_12", issn = "0302-9743", isbn = "978-3-642-40934-9", keywords = "Universal artificial intelligence; exploration; reinforcement learning; algorithmic information theory; Solomonoff induction.", abstract = "We define an optimal Bayesian knowledge-seeking agent, KL-KSA, designed for countable hypothesis classes of stochastic environments and whose goal is to gather as much information about the unknown world as possible. Although this agent works for arbitrary countable classes and priors, we focus on the especially interesting case where all stochastic computable environments are considered and the prior is based on Solomonoff's universal prior. Among other properties, we show that KL-KSA learns the true environment in the sense that it learns to predict the consequences of actions it does not take. We show that it does not consider noise to be information and avoids taking actions leading to inescapable traps. We also present a variety of toy experiments demonstrating that KL-KSA behaves according to expectation.", for = "080101(50%),080199(50%)", seo = "970108(100%)", znote = "Acceptance rate: 23/39 = 59\%", }

@InProceedings{Hutter:13ccbayessp, author = "Tor Lattimore and Marcus Hutter and Peter Sunehag", title = "Concentration and Confidence for Discrete Bayesian Sequence Predictors", booktitle = "Proc. 24th International Conf. on Algorithmic Learning Theory ({ALT'13})", address = "Singapore", series = "LNAI", volume = "8139", _editor = "S. Jain and R. Munos and F. Stephan and Th. Zeugmann", publisher = "Springer", pages = "324--338", _month = oct, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#ccbayessp", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT13/", url = "http://arxiv.org/abs/1307.0127", pdf = "http://www.hutter1.net/publ/ccbayessp.pdf", slides = "http://www.hutter1.net/publ/sccbayessp.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", doi = "10.1007/978-3-642-40935-6_23", issn = "0302-9743", isbn = "978-3-642-40934-9", keywords = "Bayesian sequence prediction; concentration of measure; information theory; KWIK learning.", abstract = "Bayesian sequence prediction is a simple technique for predicting future symbols sampled from an unknown measure on infinite sequences over a countable alphabet. While strong bounds on the expected cumulative error are known, there are only limited results on the distribution of this error. We prove tight high-probability bounds on the cumulative error, which is measured in terms of the Kullback-Leibler (KL) divergence. We also consider the problem of constructing upper confidence bounds on the KL and Hellinger errors similar to those constructed from Hoeffding-like bounds in the i.i.d. case. The new results are applied to show that Bayesian sequence prediction can be used in the Knows What It Knows (KWIK) framework with bounds that match the state-of-the-art.", support = "ARC grant DP120100950", for = "010405(70%),010404(30%)", seo = "970101(100%)", znote = "Acceptance rate: 23/39 = 59\%", }

@Proceedings{Hutter:13ewrlabs, editor = "Peter Auer and Marcus Hutter and Laurent Orseau", title = "Reinforcement Learning", subtitle = "Dagstuhl Seminar 13321 ({EWRL'13})", publisher = "Schloss Dagstuhl -- Leibniz-Zentrum fuer Informatik", address = "Dagstuhl, Germany", volume = "3", number = "8", _month = aug, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#ewrlabs13", url = "http://drops.dagstuhl.de/opus/volltexte/2013/4340/", pdf = "http://www.hutter1.net/publ/ewrlabs13.pdf", project = "http://www.hutter1.net/official/projects.htm#other", issn = "2192-5283", doi = "10.4230/DagRep.3.8.1", keywords = "Machine Learning, Reinforcement Learning, Markov Decision Processes, Planning", abstract = "This Dagstuhl Seminar also stood as the 11th European Workshop on Reinforcement Learning (EWRL11). Reinforcement learning gains more and more attention each year, as can be seen at the various conferences (ECML, ICML, IJCAI, ...). EWRL, and in particular this Dagstuhl Seminar, aimed at gathering people interested in reinforcement learning from all around the globe. This unusual format for EWRL helped viewing the field and discussing topics differently.", for = "080101(50%),080198(50%)", seo = "970108(100%)" }

@InProceedings{Hutter:13problogics, author = "Marcus Hutter and John W. Lloyd and Kee Siong Ng and William T.B. Uther", title = "Unifying Probability and Logic for Learning", booktitle = "Proc. 2nd Workshop on Weighted Logics for AI ({WL4AI'13})", _volume = "2", pages = "65--72", _editor = "Lluis Godo and Henri Prade and Guilin Qi", publisher = "", address = "Beijing, China", _month = aug, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#problogics", url = "http://ijcai13.org/program/workshop/32", http = "http://www.iiia.csic.es/wl4ai-2013/working_papers", pdf = "http://www.hutter1.net/publ/problogics.pdf", slides = "http://www.hutter1.net/publ/sproblogic.pdf", video = "http://www.youtube.com/watch?v=WEkZSHcRsAM", project = "http://www.hutter1.net/official/projects.htm#logic", conf = "http://www.iiia.csic.es/wl4ai-2013/", keywords = "higher-order logic; probability on sentences; Gaifman; Cournot; Bayes; induction; confirmation; learning; prior; knowledge; entropy.", abstract = "Uncertain knowledge can be modeled by using graded probabilities rather than binary truth-values, but so far a completely satisfactory integration of logic and probability has been lacking. In particular the inability of confirming universal hypotheses has plagued most if not all systems so far. We address this problem head on. The main technical problem to be discussed is the following: Given a set of sentences, each having some probability of being true, what probability should be ascribed to other (query) sentences? A natural wish-list, among others, is that the probability distribution (i) is consistent with the knowledge base, (ii) allows for a consistent inference procedure and in particular (iii) reduces to deductive logic in the limit of probabilities being 0 and 1, (iv) allows (Bayesian) inductive reasoning and (v) learning in the limit and in particular (vi) allows confirmation of universally quantified hypotheses/sentences. We show that probabilities satisfying (i)-(vi) exist, and present necessary and sufficient conditions (Gaifman and Cournot). The theory is a step towards a globally consistent and empirically satisfactory unification of probability and logic.", support = "ARC grant DP0877635", for = "080203(50%),010404(30%),080401(10%),080101(10%)", seo = "970108(80%),970101(20%)", znote = "Only appears on WS/IJCAI website", }

@InProceedings{Hutter:13agscilaws, author = "Peter Sunehag and Marcus Hutter", title = "Learning Agents with Evolving Hypothesis Classes", booktitle = "Proc. 6th Conf. on Artificial General Intelligence ({AGI'13})", series = "LNAI", volume = "7999", pages = "150--159", _editor = "Kai-Uwe Kuehnberger and Sebastian Rudolph and Pei Wang", publisher = "Springer, Heidelberg", _address = "Beijing, China", _month = jul, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#agscilaws", pdf = "http://www.hutter1.net/publ/agscilaws.pdf", slides = "http://www.hutter1.net/publ/sagscilaws.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-39521-5_16", issn = "0302-9743", isbn = "978-3-642-39520-8", abstract = "It has recently been shown that a Bayesian agent with a universal hypothesis class resolves most induction problems discussed in the philosophy of science. These ideal agents are, however, neither practical nor a good model for how real science works. We here introduce a framework for learning based on implicit beliefs over all possible hypotheses and limited sets of explicit theories sampled from an implicit distribution represented only by the process by which it generates new hypotheses. We address the questions of how to act based on a limited set of theories as well as what an ideal sampling process should be like. Finally, we discuss topics in philosophy of science and cognitive science from the perspective of this framework.", support = "ARC grant DP120100950", for = "080101(100%)", seo = "970108(80%),970122(20%)", znote = "Acceptance rate: 20/42 = 48\%.", }

@Article{Hutter:13pacgrl, author = "Tor Lattimore and Marcus Hutter and Peter Sunehag", title = "The Sample-Complexity of General Reinforcement Learning", journal = "Journal of Machine Learning Research, W\&CP: ICML", volume = "28", number = "3", pages = "28--36", _editor = "S. Dasgupta and D. McAllester", publisher = "", _address = "Atlanta, Georgia, USA", _month = jun, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#pacgrl", http = "http://jmlr.org/proceedings/papers/v28/lattimore13.html", url = "http://arxiv.org/abs/1308.4828", pdf = "http://www.hutter1.net/publ/pacgrl.pdf", latex = "http://www.hutter1.net/publ/pacgrl.tex", slides = "http://www.hutter1.net/publ/spacgrl.pdf", project = "http://www.hutter1.net/official/projects.htm#agent", issn = "1532-4435", keywords = "reinforcement learning; sample complexity; PAC bounds", abstract = "We present a new algorithm for general reinforcement learning where the true environment is known to belong to a finite class of N arbitrary models. The algorithm is shown to be near-optimal for all but O(N log^2 N) timesteps with high probability. Infinite classes are also considered where we show that compactness is a key criterion for determining the existence of uniform sample-complexity bounds. A matching lower bound is given for the finite case.", support = "ARC grant DP120100950", for = "010405(50%),080199(50%). See CD4/Projects/Grant-Info.txt for more", seo = "970108(100%)", }

@Article{Hutter:13sad, author = "Marcus Hutter", title = "Sparse Adaptive {D}irichlet-Multinomial-like Processes", journal = "Journal of Machine Learning Research, W\&CP: COLT", volume = "30", pages = "432--459", _month = jun, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#sad", url = "http://arxiv.org/abs/1305.3671", pdf = "http://www.hutter1.net/publ/sad.pdf", latex = "http://www.hutter1.net/publ/sad.tex", slides = "http://www.hutter1.net/publ/ssad.pdf", audio = "http://vmc.aarnet.edu.au/userdata/0b/0b4d5c6f-e775-4d48-8b47-32dc95d19b8b/ingest1685426376076922317.asf", project = "http://www.hutter1.net/official/projects.htm#infoth", issn = "1532-4435", keywords = "sparse coding; adaptive parameters; Dirichlet-Multinomial; Polya urn; data-dependent redundancy bound; small/large alphabet; data compression.", abstract = "Online estimation and modelling of i.i.d. data for short sequences over large or complex ``alphabets'' is a ubiquitous (sub)problem in machine learning, information theory, data compression, statistical language processing, and document analysis. The Dirichlet-Multinomial distribution (also called Polya urn scheme) and extensions thereof are widely applied for online i.i.d. estimation. Good a-priori choices for the parameters in this regime are difficult to obtain though. I derive an optimal adaptive choice for the main parameter via tight, data-dependent redundancy bounds for a related model. The 1-line recommendation is to set the 'total mass' = 'precision' = 'concentration' parameter to m/2ln[(n+1)/m], where n is the (past) sample size and m the number of different symbols observed (so far). The resulting estimator is simple, online, fast, and experimental performance is superb.", for = "080401(70%),010405(30%)", seo = "970108(100%)", znote = "26th Annual Conf. on Learning Theory. Acceptance rate: 47/98 = 48\%", }

@InProceedings{Hutter:13mnonconv, author = "Tor Lattimore and Marcus Hutter", title = "On {M}artin-L{\"o}f Convergence of {S}olomonoff's Mixture", booktitle = "Proc. 10th Annual Conference on Theory and Applications of Models of Computation ({TAMC'13})", volume = "7876", pages = "212--223", series = "LNCS", _editor = "T-H.H. Chan and L.C. Lau and L. Trevisan", publisher = "Springer", address = "Hong Kong, China", _month = may, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#mnonconv", pdf = "http://www.hutter1.net/publ/mnonconv.pdf", slides = "http://www.hutter1.net/publ/smnonconv.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", issn = "0302-9743", isbn = "978-3-642-38235-2", doi = "10.1007/978-3-642-38236-9_20", keywords = "Solomonoff induction, Kolmogorov complexity, theory of computation.", abstract = "We study the convergence of Solomonoff's universal mixture on individual Martin-L{\"o}f random sequences. A new result is presented extending the work of Hutter and Muchnik (2004) by showing that there does not exist a universal mixture that converges on all Martin-L{\"o}f random sequences.", for = "080401(50%),010404(30%),010405(20%)", seo = "970101(30%),970108(70%)", znote = "Acceptance rate: 31/70 = 44\%", }

@Article{Hutter:13alttcs, author = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann", title = "{ALT'10} Special Issue", journal = "Theoretical Computer Science", editor = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann", volume = "473", publisher = "Elsevier", pages = "1--3/178", _month = feb, year = "2013", bibtex = "http://www.hutter1.net/official/bib.htm#alt10tcs", http = "http://www.sciencedirect.com/science/journal/03043975/473", doi = "10.1016/j.tcs.2012.10.007", issn = "0304-3975", keywords = "algorithmic learning theory, special issue, preface", abstract = "This special issue contains expanded versions of papers that appeared in preliminary form in the proceedings of the 21st International Conference on Algorithmic Learning Theory (ALT 2010), which was held in Canberra, Australia during October 6--8, 2010. \emph{Algorithmic Learning Theory} is a conference series which is dedicated to the theoretical study of the algorithmic aspects of learning. The best papers of the conference ALT 2010 were invited for this special issue and after a thorough reviewing process, most of them qualified for this Special Issue on Algorithmic Learning Theory of Theoretical Computer Science. The preface contains a short introduction to each of these papers.", for = "080401(20%),010405(20%),080199(60%)", seo = "970108(100%)", }

## %-------------Publications-of-Marcus-Hutter-2012--------------%

@Article{Hutter:12lstphi, author = "Mayank Daswani and Peter Sunehag and Marcus Hutter", title = "Feature Reinforcement Learning using Looping Suffix Trees", journal = "Journal of Machine Learning Research, W\&CP", volume = "24", pages = "11--23", _month = dec, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#lstphi", http = "http://proceedings.mlr.press/v24/daswani12a.html", pdf = "http://www.hutter1.net/publ/lstphi.pdf", latex = "http://www.hutter1.net/publ/lstphi.tex", slides = "http://www.hutter1.net/publ/slstphi.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", issn = "1532-4435", keywords = "looping suffix trees; Markov decision process; reinforcement learning; partial observability; information \& complexity; Monte Carlo search; rational agents.", abstract = "There has recently been much interest in history-based methods using suffix trees to solve POMDPs. However, these suffix trees cannot efficiently represent environments that have long-term dependencies. We extend the recently introduced CT$\Phi$MDP algorithm to the space of looping suffix trees which have previously only been used in solving deterministic POMDPs. The resulting algorithm replicates results from CT$\Phi$MDP for environments with short term dependencies, while it outperforms LSTM-based methods on TMaze, a deep memory environment.", support = "ARC grant DP120100950", for = "080401(20%),010405(20%),080101(60%)", seo = "970108(100%)", }

@InProceedings{Hutter:12aixiens, author = "Joel Veness and Peter Sunehag and Marcus Hutter", title = "On Ensemble Techniques for {AIXI} Approximation", booktitle = "Proc. 5th Conf. on Artificial General Intelligence ({AGI'12})", series = "LNAI", volume = "7716", pages = "341--351", _editor = "J. Bach and B. Goertzel and M. Ikle", publisher = "Springer, Heidelberg", _address = "Oxford, UK", _month = dec, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#aixiens", pdf = "http://www.hutter1.net/publ/aixiens.pdf", slides = "http://www.hutter1.net/publ/saixiens.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-35506-6_35", issn = "0302-9743", isbn = "978-3-642-35505-9", keywords = "Ensemble Techniques; AIXI; Universal Artificial Intelligence; Agent Architectures; Perception and Perceptual Modeling.", abstract = "One of the key challenges in AIXI approximation is model class approximation - i.e. how to meaningfully approximate Solomonoff Induction without requiring an infeasible amount of computation? This paper advocates a bottom-up approach to this problem, by describing a number of principled ensemble techniques for approximate AIXI agents. Each technique works by efficiently combining a set of existing environment models into a single, more powerful model. These techniques have the potential to play an important role in future AIXI approximations.", support = "ARC grant DP120100950", for = "080401(20%),010404(30%),080101(30%)", seo = "970108(100%)", znote = "Acceptance rate: 34/80 = 42\%.", }

@InProceedings{Hutter:12aixiopt, author = "Peter Sunehag and Marcus Hutter", title = "Optimistic {AIXI}", booktitle = "Proc. 5th Conf. on Artificial General Intelligence ({AGI'12})", series = "LNAI", volume = "7716", pages = "312--321", _editor = "J. Bach and B. Goertzel and M. Ikle", publisher = "Springer, Heidelberg", _address = "Oxford, UK", _month = dec, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#aixiopt", pdf = "http://www.hutter1.net/publ/aixiopt.pdf", slides = "http://www.hutter1.net/publ/saixiopt.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-35506-6_32", issn = "0302-9743", isbn = "978-3-642-35505-9", keywords = "AIXI; Reinforcement Learning; Optimism; Optimality; Agents; Prior Sets; Bets.", abstract = "We consider extending the AIXI agent by using multiple (or even a compact class of) priors. This has the benefit of weakening the conditions on the true environment that we need to prove asymptotic optimality. Furthermore, it decreases the arbitrariness of picking the prior or reference machine. We connect this to removing symmetry between accepting and rejecting bets in the rationality axiomatization of AIXI and replacing it with optimism. Optimism is often used to encourage exploration in the more restrictive Markov Decision Process setting and it alleviates the problem that AIXI (with geometric discounting) stops exploring prematurely.", support = "ARC grant DP120100950", for = "080101(70%),220302(30%)", seo = "970108(100%)", znote = "Acceptance rate: 34/80 = 42\%.", }

@InProceedings{Hutter:12windowkt, author = "Peter Sunehag and Wen Shao and Marcus Hutter", title = "Coding of Non-Stationary Sources as a Foundation for Detecting Change Points and Outliers in Binary Time-Series", booktitle = "Proc. 10th Australasian Data Mining Conference ({AusDM'12})", volume = "134", pages = "79--84", _editor = "Yanchang Zhao and Jiuyong Li and Paul Kennedy and Peter Christen", publisher = "Australian Computer Society", address = "Sydney, Australia", _month = dec, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#windowkt", http = "http://crpit.com/abstracts/CRPITV134Sunehag.html", pdf = "http://www.hutter1.net/publ/windowkt.pdf", tex = "http://www.hutter1.net/publ/windowkt.tex", slides = "http://www.hutter1.net/publ/swindowkt.pdf", project = "http://www.hutter1.net/official/projects.htm#compress", issn = "1445-1336", isbn = "978-1-921770-14-2", keywords = "non-stationary sources; time-series; change point detection; outlier; compression", abstract = "An interesting scheme for estimating and adapting distributions in real-time for non-stationary data has recently been the focus of study for several different tasks relating to time series and data mining, namely change point detection, outlier detection and online compression/ sequence prediction. An appealing feature is that unlike more sophisticated procedures, it is as fast as the related stationary procedures which are simply modified through discounting or windowing. The discount scheme makes older observations lose their in uence on new predictions. The authors of this article recently used a discount scheme for introducing an adaptive version of the Context Tree Weighting compression algorithm. The mentioned change point and outlier detection methods rely on the changing compression ratio of an online compression algorithm. Here we are beginning to provide theoretical foundations for the use of these adaptive estimation procedures that have already shown practical promise.", support = "ARC grant DP120100950", for = "080401", seo = "970108(100%)", znote = "Acceptance rate: 25/55 = 45\%", }

@InProceedings{Hutter:12optopt, author = "Peter Sunehag and Marcus Hutter", title = "Optimistic Agents are Asymptotically Optimal", booktitle = "Proc. 25th Australasian Joint Conference on Artificial Intelligence ({AusAI'12})", series = "LNAI", volume = "7691", pages = "15--26", _editor = "Michael Thielscher and Dongmo Zhang", publisher = "Springer", address = "Sydney, Australia", _month = dec, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#optopt", url = "http://arxiv.org/abs/1210.0077", pdf = "http://www.hutter1.net/publ/optopt.pdf", latex = "http://www.hutter1.net/publ/optopt.tex", slides = "http://www.hutter1.net/publ/soptopt.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "0302-9743", isbn = "978-3-642-35100-6", doi = "10.1007/978-3-642-35101-3_2", keywords = "Reinforcement Learning; Optimism; Optimality; Agents; Uncertainty.", abstract = "We use optimism to introduce generic asymptotically optimal reinforcement learning agents. They achieve, with an arbitrary finite or compact class of environments, asymptotically optimal behavior. Furthermore, in the finite deterministic case we provide finite error bounds.", support = "ARC grant DP120100950", for = "080101(70%),220302(30%)", seo = "970108(100%)", znote = "Acceptance rate: 76/196 = 39\%", }

@InCollection{Hutter:12ctoe2, author = "Marcus Hutter", title = "The Subjective Computable Universe", booktitle = "A Computable Universe: Understanding and Exploring Nature as Computation", pages = "399--416", _editor = "Hector Zenil", publisher = "World Scientific", _month = dec, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#ctoe2", pdf = "http://www.hutter1.net/publ/ctoe2.pdf", latex = "http://www.hutter1.net/publ/ctoe2.zip", slides = "http://www.hutter1.net/publ/sctoe.pdf", video = "http://pirsa.org/displayFlash.php?id=18040117", http = "http://www.worldscientific.com/worldscibooks/10.1142/8306", project = "http://www.hutter1.net/official/projects.htm#physics", isbn = "978-9-814-37429-3", keywords = "world models; observer localization; computability; predictive power; Ockham's razor; universal theories; inductive reasoning; simplicity and complexity.", abstract = "Nearly all theories developed for our world are computational. The fundamental theories in physics can be used to emulate on a computer ever more aspects of our universe. This and the ubiquity of computers and virtual realities has increased the acceptance of the computational paradigm. A computable theory of everything seems to have come within reach. Given the historic progression of theories from ego- to geo- to helio-centric models to universe and multiverse theories, the next natural step was to postulate a multiverse composed of all computable universes. Unfortunately, rather than being a theory of everything, the result is more a theory of nothing, which actually plagues all too-large universe models in which observers occupy random or remote locations. The problem can be solved by incorporating the subjective observer process into the theory. While the computational paradigm exposes a fundamental problem of large-universe theories, it also provides its solution.", for = "080401(70%),020103(30%)", seo = "970122(100%)", }

@InProceedings{Hutter:12watershed, author = "Di Yang and Stephen Gould and Marcus Hutter", title = "A Noise Tolerant Watershed Transformation with Viscous Force for Seeded Image Segmentation", booktitle = "Proc. 11th Asian Conf. on Computer Vision ({ACCV'12})", address = "Daejeon, Korea", series = "LNCS", volume = "7724", pages = "775--789", _editor = "K. M. Lee and Y. Matsushita and J. M. Rehg and Z. Hu", publisher = "Springer", _month = nov, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#watershed", pdf = "http://www.hutter1.net/publ/watershed.pdf", project = "http://www.hutter1.net/official/projects.htm#icar", issn = "0302-9743", isbn = "978-3-642-37330-5", doi = "10.1007/978-3-642-37331-2_58", keywords = "seeded image segmentation; viscous force; local average path; noise-tolerant.", abstract = "The watershed transform was proposed as a novel method for image segmentation over 30 years ago. Today it is still used as an elementary step in many powerful segmentation procedures. The watershed transform constitutes one of the main concepts of mathematical morphology as an important region-based image segmentation approach. However, the original watershed transform is highly sensitive to noise and is incapable of detecting objects with broken edges. Consequently its adoption in domains where imaging is subject to high noise is limited. By incorporating a high-order energy term into the original watershed transform, we proposed the viscous force watershed transform, which is more immune to noise and able to detect objects with broken edges.", support = "ControlExpert GmbH", for = "080106(100%)", seo = "970108(80%),890205(20%)", znote = "Acceptance rate: 226/869 = 26\%", }

@InProceedings{Hutter:12pacmdp, author = "Tor Lattimore and Marcus Hutter", title = "{PAC} bounds for discounted {MDP}s", booktitle = "Proc. 23rd International Conf. on Algorithmic Learning Theory ({ALT'12})", address = "Lyon, France", series = "LNAI", volume = "7568", _editor = "N.H. Bshouty and G. Stoltz and N. Vayatis and T. Zeugmann", publisher = "Springer", pages = "320--334", _month = oct, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#pacmdp", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT12/", url = "http://arxiv.org/abs/1202.3890", pdf = "http://www.hutter1.net/publ/pacmdp.pdf", slides = "http://www.hutter1.net/publ/spacmdp.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", doi = "10.1007/978-3-642-34106-9_26", issn = "0302-9743", isbn = "3-642-34105-2", keywords = "Reinforcement learning; sample-complexity; exploration exploitation; PAC-MDP; Markov decision processes.", abstract = "We study upper and lower bounds on the sample-complexity of learning near-optimal behaviour in finite-state discounted Markov Decision Processes (MDPs). We prove a new bound for a modified version of Upper Confidence Reinforcement Learning (UCRL) with only cubic dependence on the horizon. The bound is unimprovable in all parameters except the size of the state/action space, where it depends linearly on the number of non-zero transition probabilities. The lower bound strengthens previous work by being both more general (it applies to all policies) and tighter. The upper and lower bounds match up to logarithmic factors provided the transition matrix is not too dense.", support = "ARC grant DP0988049", for = "010404(30%),010405(30%),080198(40%).", seo = "970108(100%)", znote = "Acceptance rate: 23/47 = 49\%", }

@InCollection{Hutter:12uaigentle, author = "Marcus Hutter", title = "One Decade of Universal Artificial Intelligence", booktitle = "Theoretical Foundations of Artificial General Intelligence", pages = "67--88", _editor = "Pei Wang and Ben Goertzel", publisher = "Atlantis Press", _month = sep, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#uaigentle", url = "http://arxiv.org/abs/1202.6153", pdf = "http://www.hutter1.net/publ/uaigentle.pdf", latex = "http://www.hutter1.net/publ/uaigentle.zip", slides = "http://www.hutter1.net/publ/suaigentle.pdf", slides2 = "http://www.hutter1.net/publ/suai4lay.pdf", video = "http://vimeo.com/7321732", video2 = "http://www.youtube.com/watch?v=I-vx5zbOOXI", http = "http://2012.singularitysummit.com.au/2012/08/universal-artificial-intelligence/", project = "http://www.hutter1.net/official/projects.htm#uai", interview = "http://www.youtube.com/watch?v=a2tgUXm_txw", doi = "10.2991/978-94-91216-62-6_5", isbn = "978-94-91216-61-9(print) 978-94-91216-62-6(online)", keywords = "artificial intelligence; reinforcement learning; algorithmic information theory; sequential decision theory; universal induction; rational agents; foundations.", abstract = "The first decade of this century has seen the nascency of the first mathematical theory of general artificial intelligence. This theory of Universal Artificial Intelligence (UAI) has made significant contributions to many theoretical, philosophical, and practical AI questions. In a series of papers culminating in book (Hutter, 2005), an exciting sound and complete mathematical model for a super intelligent agent (AIXI) has been developed and rigorously analyzed. While nowadays most AI researchers avoid discussing intelligence, the award-winning PhD thesis (Legg, 2008) provided the philosophical embedding and investigated the UAI-based universal measure of rational intelligence, which is formal, objective and non-anthropocentric. Recently, effective approximations of AIXI have been derived and experimentally investigated in JAIR paper (Veness et al. 2011). This practical breakthrough has resulted in some impressive applications, finally muting earlier critique that UAI is only a theory. For the first time, without providing any domain knowledge, the same agent is able to self-adapt to a diverse range of interactive environments. For instance, AIXI is able to learn from scratch to play TicTacToe, Pacman, Kuhn Poker, and other games by trial and error, without even providing the rules of the games. These achievements give new hope that the grand goal of Artificial General Intelligence is not elusive. This article provides an informal overview of UAI in context. It attempts to gently introduce a very theoretical, formal, and mathematical subject, and discusses philosophical and technical ingredients, traits of intelligence, some social questions, and the past and future of UAI.", support = "ARC grant DP0988049", for = "080401(20%),080101(30%),080199(30%),220399(20%)", seo = "970108(80%),870122(20%)", }

@InProceedings{Hutter:12ctmrl, author = "Phuong Nguyen and Peter Sunehag and Marcus Hutter", title = "Context Tree Maximizing Reinforcement Learning", booktitle = "Proc. 26th {AAAI} Conference on Artificial Intelligence ({AAAI'12})", volume = "", pages = "1075--1082", _editor = "Jörg Hoffmann and Bart Selman", publisher = "AAAI Press", address = "Toronto, Canada", _month = jul, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#ctmrl", http = "http://www.aaai.org/ocs/index.php/AAAI/AAAI12/paper/view/5079", pdf = "http://www.hutter1.net/publ/ctmrl.pdf", latex = "http://www.hutter1.net/publ/ctmrl.zip", poster = "http://www.hutter1.net/publ/sctmrl.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", code = "http://www.hutter1.net/publ/ctmrlcode.zip", isbn = "978-1-57735-568-7", keywords = "Context Tree Maximization; Markov Decision Process; Feature Reinforcement Learning.", abstract = "Recent developments in reinforcement learning for non- Markovian problems witness a surge in history-based methods, among which we are particularly interested in two frameworks, PhiMDP and MC-AIXI-CTW. PhiMDP attempts to reduce the general RL problem, where the environment's states and dynamics are both unknown, to an MDP, while MCAIXI- CTW incrementally learns a mixture of context trees as its environment model. The main idea of PhiMDP is to connect generic reinforcement learning with classical reinforcement learning. The first implementation of PhiMDP relies on a stochastic search procedure for finding a tree that minimizes a certain cost function. This does not guarantee finding the minimizing tree, or even a good one, given limited search time. As a consequence it appears that the approach has difficulties with large domains. MC-AIXI-CTW is attractive in that it can incrementally and analytically compute the internal model through interactions with the environment. Unfortunately, it is computationally demanding due to requiring heavy planning simulations at every single time step. We devise a novel approach called CTMRL, which analytically and efficiently finds the cost-minimizing tree. Instead of the context-tree weighting method that MC-AIXI-CTW is based on, we use the closely related context-tree maximizing algorithm that selects just one single tree. This approach falls under the PhiMDP framework, which allows the replacement of the costly planning component of MC-AIXI-CTW with simple Q-Learning. Our empirical investigation shows that CTMRL finds policies of quality as good as MC-AIXI-CTW's on six domains including a challenging Pacman domain, but in an order of magnitude less time.", support = "ARC grant DP120100950", for = "080401(20%),010405(20%),080101(60%)", seo = "970108(100%)", znote = "Acceptance rate: 294/1129 = 26\%", }

@TechReport{Hutter:12ssdc, author = "Joel Veness and Marcus Hutter", title = "Sparse Sequential {D}irichlet Coding", institution = "UoA and ANU", number = "arXiv:1206.3618", _month = jun, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#ssdc", url = "http://arxiv.org/abs/1206.3618", pdf = "http://www.hutter1.net/publ/ssdc.pdf", latex = "http://www.hutter1.net/publ/ssdc.tex", slides = "http://www.hutter1.net/publ/ssad.pdf", project = "http://www.hutter1.net/official/projects.htm#compress", keywords = "Dirichlet prior; KT estimator; sparse coding; small/large alphabet; data compression.", abstract = "This short paper describes a simple coding technique, Sparse Sequential Dirichlet Coding, for multi-alphabet memoryless sources. It is appropriate in situations where only a small, unknown subset of the possible alphabet symbols can be expected to occur in any particular data sequence. We provide a competitive analysis which shows that the performance of Sparse Sequential Dirichlet Coding will be close to that of a Sequential Dirichlet Coder that knows in advance the exact subset of occurring alphabet symbols. Empirically we show that our technique can perform similarly to the more computationally demanding Sequential Sub-Alphabet Estimator, while using less computational resources.", for = "080401(100%)", seo = "970108(80%),890205(20%)", }

@InProceedings{Hutter:12ctswitch, author = "Joel Veness and Kee Siong Ng and Marcus Hutter and Michael Bowling", title = "Context Tree Switching", booktitle = "Proc. Data Compression Conference ({DCC'12})", pages = "327--336", _editor = "J. A. Storer and M. W. Marcellin", publisher = "IEEE Computer Society", address = "Snowbird, Utah, USA", _month = apr, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#ctswitch", url = "http://arxiv.org/abs/1111.3182", pdf = "http://www.hutter1.net/publ/ctswitch.pdf", latex = "http://www.hutter1.net/publ/ctswitch.tex", slides = "http://www.hutter1.net/publ/sctswitch.pdf", project = "http://www.hutter1.net/official/projects.htm#compress", code = "http://jveness.info/software/cts-v1.zip", doi = "10.1109/DCC.2012.39", issn = "1068-0314", isbn = "978-1-4673-0715-4", keywords = "switching data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.", abstract = "This paper describes the Context Tree Switching technique, a modification of Context Tree Weighting for the prediction of binary, stationary, n-Markov sources. By modifying Context Tree Weighting's recursive weighting scheme, it is possible to mix over a strictly larger class of models without increasing the asymptotic time or space complexity of the original algorithm. We prove that this generalization preserves the desirable theoretical properties of Context Tree Weighting on stationary n-Markov sources, and show empirically that this new technique leads to consistent improvements over Context Tree Weighting as measured on the Calgary Corpus.", support = "ARC grant DP0988049", for = "080401(100%)", seo = "970108(80%),890205(20%)", }

@InProceedings{Hutter:12adapctw, author = "Alexander O'Neill and Marcus Hutter and Wen Shao and Peter Sunehag", title = "Adaptive Context Tree Weighting", booktitle = "Proc. Data Compression Conference ({DCC'12})", pages = "317--326", _editor = "J. A. Storer and M. W. Marcellin", publisher = "IEEE Computer Society", address = "Snowbird, Utah, USA", _month = apr, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#adapctw", url = "http://arxiv.org/abs/1201.2056", pdf = "http://www.hutter1.net/publ/adapctw.pdf", latex = "http://www.hutter1.net/publ/adapctw.tex", slides = "http://www.hutter1.net/publ/sadapctw.pdf", project = "http://www.hutter1.net/official/projects.htm#compress", code = "http://www.hutter1.net/publ/actwcode.zip", doi = "10.1109/DCC.2012.38", issn = "1068-0314", isbn = "978-0-7695-4656-8", keywords = "adaptive data compression; universal code; prediction; Context Tree Weighting (CTW) algorithm.", abstract = "We describe an adaptive context tree weighting (ACTW) algorithm, as an extension to the standard context tree weighting (CTW) algorithm. Unlike the standard CTW algorithm, which weights all observations equally regardless of the depth, ACTW gives increasing weight to more recent observations, aiming to improve performance in cases where the input sequence is from a non-stationary distribution. Data compression results show ACTW variants improving over CTW on merged files from standard compression benchmark tests while never being significantly worse on any individual file.", support = "ARC grant DP0988049", for = "080401(100%)", seo = "970108(80%),890205(20%)", }

@Article{Hutter:12singularity, author = "Marcus Hutter", title = "Can Intelligence Explode?", journal = "Journal of Consciousness Studies", volume = "19", number = "1-2", pages = "143--166", publisher = "Imprint Academic", _month = feb, year = "2012", bibtex = "http://www.hutter1.net/official/bib.htm#singularity", http = "http://www.ingentaconnect.com/content/imp/jcs/2012/00000019/F0020001/art00010", url = "http://arxiv.org/abs/1202.6177", pdf = "http://www.hutter1.net/publ/singularity.pdf", latex = "http://www.hutter1.net/publ/singularity.tex", slidespdf = "http://www.hutter1.net/publ/ssingularity.pdf", slidesppt = "http://www.hutter1.net/publ/ssingularity.pptx", slidesaudio = "http://www.hutter1.net/publ/sasingularity.pptx", slidesvideo= "http://www.hutter1.net/publ/svsingularity.avi", livevideo = "http://www.youtube.com/watch?v=focaMjQbmkI", http = "http://2012.singularitysummit.com.au/2012/08/can-intelligence-explode/", project = "http://www.hutter1.net/official/projects.htm#uai", interview = "http://www.youtube.com/watch?v=omG990F_ETY", issn = "1355-8250", keywords = "singularity; acceleration; intelligence; evolution; rationality; goal; life; value; virtual; computation; AIXI.", abstract = "The technological singularity refers to a hypothetical scenario in which technological advances virtually explode. The most popular scenario is the creation of super-intelligent algorithms that recursively create ever higher intelligences. It took many decades for these ideas to spread from science fiction to popular science magazines and finally to attract the attention of serious philosophers. David Chalmers' (JCS 2010) article is the first comprehensive philosophical analysis of the singularity in a respected philosophy journal. The motivation of my article is to augment Chalmers' and to discuss some issues not addressed by him, in particular what it could mean for intelligence to explode. In this course, I will (have to) provide a more careful treatment of what intelligence actually is, separate speed from intelligence explosion, compare what super-intelligent participants and classical human observers might experience and do, discuss immediate implications for the diversity and value of life, consider possible bounds on intelligence, and contemplate intelligences right at the singularity.", for = "080101(40%),140104(10%),220312(50%)", seo = "970122(100%)", }

## %-------------Publications-of-Marcus-Hutter-2011--------------%

@InProceedings{Hutter:11segm3d, author = "Srimal Jayawardena and Di Yang and Marcus Hutter", title = "3D Model Assisted Image Segmentation", booktitle = "Proc. 13th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'11})", pages = "51--58", _editor = "Andrew Bradley and Paul Jackway", publisher = "IEEE Xplore", address = "Noosa, Australia", _month = dec, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#segm3d", url = "http://arxiv.org/abs/1202.1943", pdf = "http://www.hutter1.net/publ/segm3d.pdf", latex = "http://www.hutter1.net/publ/segm3d.zip", slides = "http://www.hutter1.net/publ/ssegm3d.pdf", project = "http://www.hutter1.net/official/projects.htm#icar", doi = "10.1109/DICTA.2011.17", isbn = "978-1-4577-2006-2 or 978-0-7695-4588-2", keywords = "Image segmentation; 3D-2D Registration; Full 3D Pose; Contour Detection; Fully Automatic.", abstract = "The problem of segmenting a given image into coherent regions is important in Computer Vision and many industrial applications require segmenting a known object into its components. Examples include identifying individual parts of a component for process control work in a manufacturing plant and identifying parts of a car from a photo for automatic damage detection. Unfortunately most of an object's parts of interest in such applications share the same pixel characteristics, having similar colour and texture. This makes segmenting the object into its components a non-trivial task for conventional image segmentation algorithms. In this paper, we propose a ``Model Assisted Segmentation'' method to tackle this problem. A 3D model of the object is registered over the given image by optimising a novel gradient based loss function. This registration obtains the full 3D pose from an image of the object. The image can have an arbitrary view of the object and is not limited to a particular set of views. The segmentation is subsequently performed using a level-set based method, using the projected contours of the registered 3D model as initialisation curves. The method is fully automatic and requires no user interaction. Also, the system does not require any prior training. We present our results on photographs of a real car.", support = "ControlExpert GmbH", for = "080104(50%),080106(50%)", znote = "Acceptance rate: 42/200 = 21\% (oral)." }

@InProceedings{Hutter:11losspose, author = "Srimal Jayawardena and Marcus Hutter and Nathan Brewer", title = "A Novel Illumination-Invariant Loss for Monocular 3D Pose Estimation", booktitle = "Proc. 13th International Conf. on Digital Image Computing: Techniques and Applications ({DICTA'11})", pages = "37--44", _editor = "Andrew Bradley and Paul Jackway", publisher = "IEEE Xplore", address = "Noosa, Australia", _month = dec, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#losspose", url = "http://arxiv.org/abs/1011.1035", pdf = "http://www.hutter1.net/publ/losspose.pdf", latex = "http://www.hutter1.net/publ/losspose.zip", slides = "http://www.hutter1.net/publ/slosspose.pdf", project = "http://www.hutter1.net/official/projects.htm#icar", doi = "10.1109/DICTA.2011.15", isbn = "978-1-4577-2006-2 or 978-0-7695-4588-2", keywords = "illumination-invariant loss; 2D-3D pose estimation; pixel-based; featureless; optimisation; 3D model, monocular.", abstract = "The problem of identifying the 3D pose of a known object from a given 2D image has important applications in Computer Vision. Our proposed method of registering a 3D model of a known object on a given 2D photo of the object has numerous advantages over existing methods. It does not require prior training, knowledge of the camera parameters, explicit point correspondences or matching features between the image and model. Unlike techniques that estimate a partial 3D pose (as in an overhead view of traffic or machine parts on a conveyor belt), our method estimates the complete 3D pose of the object. It works on a single static image from a given view under varying and unknown lighting conditions. For this purpose we derive a novel illumination-invariant distance measure between the 2D photo and projected 3D model, which is then minimised to find the best pose parameters. Results for vehicle pose detection in real photographs are presented.", support = "ControlExpert GmbH", for = "080104(50%),080106(50%)", znote = "Acceptance rate: 42/200 = 21\% (oral)." }

@InProceedings{Hutter:11uivnfl, author = "Tor Lattimore and Marcus Hutter", title = "No Free Lunch versus {O}ccam's Razor in Supervised Learning", booktitle = "Proc. Solomonoff 85th Memorial Conference", address = "Melbourne, Australia", series = "LNAI", volume = "7070", pages = "223--235", _editor = "David Dowe", publisher = "Springer", _month = nov, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#uivnfl", url = "http://arxiv.org/abs/1111.3846", pdf = "http://www.hutter1.net/publ/uivnfl.pdf", latex = "http://www.hutter1.net/publ/uivnfl.zip", slides = "http://www.hutter1.net/publ/suivnfl.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1007/978-3-642-44958-1_17", issn = "0302-9743", isbn = "978-3-642-44957-4", keywords = "Supervised Learning; Kolmogorov complexity; Occam's Razor; No Free Lunch.", abstract = "The No Free Lunch theorems are often used to argue that domain specific knowledge is required to design successful algorithms. We use algorithmic information theory to argue the case for a universal bias allowing an algorithm to succeed in all interesting problem domains. Additionally, we give a new algorithm for off-line classification, inspired by Solomonoff induction, with good performance on all structured problems under reasonable assumptions. This includes a proof of the efficacy of the well-known heuristic of randomly selecting training data in the hope of reducing misclassification rates.", support = "ARC grant DP0988049", for = "010404(20%),010405(20%),080198(60%)", seo = "970108(100%)", }

@InProceedings{Hutter:11aixiaxiom2, author = "Peter Sunehag and Marcus Hutter", title = "Principles of {S}olomonoff Induction and {AIXI}", booktitle = "Proc. Solomonoff 85th Memorial Conference", address = "Melbourne, Australia", series = "LNAI", volume = "7070", pages = "386--398", _editor = "David Dowe", publisher = "Springer", _month = nov, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#aixiaxiom2", url = "http://arxiv.org/abs/1111.6117", pdf = "http://www.hutter1.net/publ/aixiaxiom2.pdf", latex = "http://www.hutter1.net/publ/aixiaxiom2.tex", slides = "http://www.hutter1.net/publ/saixiaxiom2.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-44958-1_30", issn = "0302-9743", isbn = "978-3-642-44957-4", keywords = "computability; representation; rationality; Solomonoff induction.", abstract = "We identify principles underlying Solomonoff Induction. Key concepts are rationality, computability, indifference and time consistency. Furthermore, we discuss extensions to the full AI case to derive AIXI.", support = "ARC grant DP0988049", for = "080401(20%),010404(30%),080101(50%)", seo = "970108(80%),970122(20%)", }

@InProceedings{Hutter:11unipreq, author = "Ian Wood and Peter Sunehag and Marcus Hutter", title = "({N}on-)Equivalence of Universal Priors", booktitle = "Proc. Solomonoff 85th Memorial Conference", address = "Melbourne, Australia", series = "LNAI", volume = "7070", pages = "417--425", _editor = "David Dowe", publisher = "Springer", _month = nov, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#unipreq", url = "http://arxiv.org/abs/1111.3854", pdf = "http://www.hutter1.net/publ/unipreq.pdf", latex = "http://www.hutter1.net/publ/unipreq.tex", slides = "http://www.hutter1.net/publ/sunipreq.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1007/978-3-642-44958-1_33", issn = "0302-9743", isbn = "978-3-642-44957-4", keywords = "algorithmic information theory; universal induction; universal prior.", abstract = "Ray Solomonoff invented the notion of universal induction featuring an aptly termed ``universal'' prior probability function over all possible computable environments. The essential property of this prior was its ability to dominate all other such priors. Later, Levin introduced another construction --- a mixture of all possible priors or `universal mixture'. These priors are well known to be equivalent up to multiplicative constants. Here, we seek to clarify further the relationships between these three characterisations of a universal prior (Solomonoff's, universal mixtures, and universally dominant priors). We see that the the constructions of Solomonoff and Levin define an identical class of priors, while the class of universally dominant priors is strictly larger. We provide some characterisation of the discrepancy.", support = "ARC grant DP0988049", for = "010404(40%),010405(30%),080401(30%)", seo = "970108(100%)", }

@InProceedings{Hutter:11asyoptag, author = "Tor Lattimore and Marcus Hutter", title = "Asymptotically Optimal Agents", booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})", address = "Espoo, Finland", series = "LNAI", volume = "6925", _editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann", publisher = "Springer", pages = "368--382", _month = oct, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#asyoptag", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html", url = "http://arxiv.org/abs/1107.5537", pdf = "http://www.hutter1.net/publ/asyoptag.pdf", latex = "http://www.hutter1.net/publ/asyoptag.tex", slides = "http://www.hutter1.net/publ/sasyoptag.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-24412-4_29", issn = "0302-9743", isbn = "3-642-24411-4", keywords = "Rational agents; sequential decision theory; artificial general intelligence; reinforcement learning; asymptotic optimality; general discounting.", abstract = "Artificial general intelligence aims to create agents capable of learning to solve arbitrary interesting problems. We define two versions of asymptotic optimality and prove that no agent can satisfy the strong version while in some cases, depending on discounting, there does exist a non-computable weak asymptotically optimal agent.", support = "ARC grant DP0988049", for = "080101(100%)", znote = "Acceptance rate: 28/61 = 46\%", }

@InProceedings{Hutter:11evenbits, author = "Tor Lattimore and Marcus Hutter and Vaibhav Gavane", title = "Universal Prediction of Selected Bits", booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})", address = "Espoo, Finland", series = "LNAI", volume = "6925", _editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann", publisher = "Springer", pages = "262--276", _month = oct, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#evenbits", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html", url = "http://arxiv.org/abs/1107.5531", pdf = "http://www.hutter1.net/publ/evenbits.pdf", latex = "http://www.hutter1.net/publ/evenbits.tex", slides = "http://www.hutter1.net/publ/sevenbits.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1007/978-3-642-24412-4_22", issn = "0302-9743", isbn = "3-642-24411-4", keywords = "Sequence prediction; Solomonoff induction; online classification; discriminative learning; algorithmic information theory.", abstract = "Many learning tasks can be viewed as sequence prediction problems. For example, online classification can be converted to sequence prediction with the sequence being pairs of input/target data and where the goal is to correctly predict the target data given input data and previous input/target pairs. Solomonoff induction is known to solve the general sequence prediction problem, but only if the entire sequence is sampled from a computable distribution. In the case of classification and discriminative learning though, only the targets need be structured (given the inputs). We show that the normalised version of Solomonoff induction can still be used in this case, and more generally that it can detect any recursive sub-pattern (regularity) within an otherwise completely unstructured sequence. It is also shown that the unnormalised version can fail to predict very simple recursive sub-patterns.", support = "ARC grant DP0988049", for = "080401(40%),010404(30%),010405(30%)", znote = "Acceptance rate: 28/61 = 46\%", }

@InProceedings{Hutter:11tcdisc, author = "Tor Lattimore and Marcus Hutter", title = "Time Consistent Discounting", booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})", address = "Espoo, Finland", series = "LNAI", volume = "6925", _editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann", publisher = "Springer", pages = "383--397", _month = oct, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#tcdisc", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html", url = "http://arxiv.org/abs/1107.5528", pdf = "http://www.hutter1.net/publ/tcdisc.pdf", latex = "http://www.hutter1.net/publ/tcdisc.tex", slides = "http://www.hutter1.net/publ/stcdisc.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", doi = "10.1007/978-3-642-24412-4_30", issn = "0302-9743", isbn = "3-642-24411-4", keywords = "Rational agents; sequential decision theory; general discounting; time-consistency; game theory.", abstract = "A possibly immortal agent tries to maximise its summed discounted rewards over time, where discounting is used to avoid infinite utilities and encourage the agent to value current rewards more than future ones. Some commonly used discount functions lead to time-inconsistent behavior where the agent changes its plan over time. These inconsistencies can lead to very poor behavior. We generalise the usual discounted utility model to one where the discount function changes with the age of the agent. We then give a simple characterisation of time-(in)consistent discount functions and show the existence of a rational policy for an agent that knows its discount function is time-inconsistent.", for = "010405(20%),080101(40%),140104(20%),170202(20%)", seo = "970108(40%),970114(30%),970117(30%)", znote = "Acceptance rate: 28/61 = 46\%", }

@InProceedings{Hutter:11aixiaxiom, author = "Peter Sunehag and Marcus Hutter", title = "Axioms for Rational Reinforcement Learning", booktitle = "Proc. 22nd International Conf. on Algorithmic Learning Theory ({ALT'11})", address = "Espoo, Finland", series = "LNAI", volume = "6925", _editor = "J. Kivinen and C. Szepesv{\'a}ri and E. Ukkonen and T. Zeugmann", publisher = "Springer", pages = "338--352", _month = oct, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#aixiaxiom", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT11/alt11c.html", url = "http://arxiv.org/abs/1107.5520", pdf = "http://www.hutter1.net/publ/aixiaxiom.pdf", latex = "http://www.hutter1.net/publ/aixiaxiom.tex", slides = "http://www.hutter1.net/publ/saixiaxiom.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1007/978-3-642-24412-4_27", issn = "0302-9743", isbn = "3-642-24411-4", keywords = "Rationality; Probability; Utility; Banach Space; Linear Functional.", abstract = "We provide a formal, simple and intuitive theory of rational decision making including sequential decisions that affect the environment. The theory has a geometric flavor, which makes the arguments easy to visualize and understand. Our theory is for complete decision makers, which means that they have a complete set of preferences. Our main result shows that a complete rational decision maker implicitly has a probabilistic model of the environment. We have a countable version of this result that brings light on the issue of countable vs finite additivity by showing how it depends on the geometry of the space which we have preferences over. This is achieved through fruitfully connecting rationality with the Hahn-Banach Theorem. The theory presented here can be viewed as a formalization and extension of the betting odds approach to probability of Ramsey (1931) and De Finetti (1937).", support = "ARC grant DP0988049", for = "080401(20%),010404(30%),080101(50%)", znote = "Acceptance rate: 28/61 = 46\%", }

@Proceedings{Hutter:11ewrlproc, editor = "Scott Sanner and Marcus Hutter", title = "European Workshop on Reinforcement Learning", subtitle = "9th European Workshop ({EWRL'11})", publisher = "Springer", address = "Athens, Greece", series = "LNAI", volume = "7188", _month = sep, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#ewrlproc11", http = "http://www.springer.com/computer/ai/book/978-3-642-29945-2", pdf = "http://www.hutter1.net/publ/ewrlproc11.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", issn = "0302-9743", isbn = "978-3-642-29945-2", doi = "10.1007/978-3-642-29946-9", keywords = "artificial intelligence; machine learning; reinforcement learning; Markov decision process; function approximation; action; reward; observation; policy; agent.", abstract = "This book constitutes revised and selected papers of the 9th European Workshop on Reinforcement Learning, EWRL 2011, which took place in Athens, Greece in September 2011. The papers presented were carefully reviewed and selected from 40 submissions. The papers are organized in topical sections online reinforcement learning, learning and exploring MDPs, function approximation methods for reinforcement learning, macro-actions in reinforcement learning, policy search and bounds, multi-task and transfer reinforcement learning, multi-agent reinforcement learning, apprenticeship and inverse reinforcement learning and real-world reinforcement learning.", for = "080101(50%),080198(50%)", }

@InProceedings{Hutter:11frlexp, author = "Phuong Nguyen and Peter Sunehag and Marcus Hutter", title = "Feature Reinforcement Learning in Practice", booktitle = "Proc. 9th European Workshop on Reinforcement Learning ({EWRL-9})", series = "LNAI", volume = "7188", pages = "66--77", publisher = "Springer", _month = sep, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#frlexp", url = "http://arxiv.org/abs/1108.3614", pdf = "http://www.hutter1.net/publ/frlexp.pdf", slides = "http://www.hutter1.net/publ/sfrlexp.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", issn = "0302-9743", isbn = "978-3-642-29945-2", doi = "10.1007/978-3-642-29946-9_10", keywords = "Reinforcement learning; context Markov trees; Markov decision process; partial observability; information \& complexity; Monte Carlo search; Kuhn poker; rational agents.", abstract = "Following a recent surge in using history-based methods for resolving perceptual aliasing in reinforcement learning, we introduce an algorithm based on the feature reinforcement learning framework called $\Phi$MDP \cite{MH09c}. To create a practical algorithm we devise a stochastic search procedure for a class of context trees based on parallel tempering and a specialized proposal distribution. We provide the first empirical evaluation for $\Phi$MDP. Our proposed algorithm achieves superior performance to the classical U-tree algorithm \cite{AKM96} and the recent active-LZ algorithm \cite{Far10}, and is competitive with MC-AIXI-CTW \cite{VNHUS11} that maintains a bayesian mixture over all context trees up to a chosen depth. We are encouraged by our ability to compete with this sophisticated method using an algorithm that simply picks one single model, and uses Q-learning on the corresponding MDP. Our $\Phi$MDP algorithm is much simpler, yet consumes less time and memory. These results show promise for our future work on attacking more complex and larger problems.", support = "ARC grant DP0988049", for = "080401(30%),010405(20%),080101(50%)", }

@Article{Hutter:11uiphil, author = "Samuel Rathmanner and Marcus Hutter", title = "A Philosophical Treatise of Universal Induction", journal = "Entropy", volume = "13", number = "6", pages = "1076--1136", _month = jun, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#uiphil", url = "http://arxiv.org/abs/1105.5721", pdf = "http://www.hutter1.net/publ/uiphil.pdf", latex = "http://www.hutter1.net/publ/uiphil.zip", slides = "http://www.hutter1.net/publ/suiphil.pdf", video1 = "http://www.youtube.com/watch?v=gb4oXRsw3yA", video2 = "http://www.youtube.com/watch?v=Q_cHUpwpdFo", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.3390/e13061076", issn = "1099-4300", keywords = "sequence prediction; inductive inference; Bayes rule; Solomonoff prior; Kolmogorov complexity; Occam's razor; philosophical issues; confirmation theory; Black raven paradox.", abstract = "Understanding inductive reasoning is a problem that has engaged mankind for thousands of years. This problem is relevant to a wide range of fields and is integral to the philosophy of science. It has been tackled by many great minds ranging from philosophers to scientists to mathematicians, and more recently computer scientists. In this article we argue the case for Solomonoff Induction, a formal inductive framework which combines algorithmic information theory with the Bayesian framework. Although it achieves excellent theoretical results and is based on solid philosophical foundations, the requisite technical knowledge necessary for understanding this framework has caused it to remain largely unknown and unappreciated in the wider scientific community. The main contribution of this article is to convey Solomonoff induction and its related concepts in a generally accessible form with the aim of bridging this current technical gap. In the process we examine the major historical contributions that have led to the formulation of Solomonoff Induction as well as criticisms of Solomonoff and induction in general. In particular we examine how Solomonoff induction addresses many issues that have plagued other inductive systems, such as the black ravens paradox and the confirmation problem, and compare this approach with other recent approaches.", for = "080401(30%),010404(30%),170203(10%),220304(30%)", znote = "Special Issue on Kolmogorov Complexity edited by Paul Vitanyi", }

@InCollection{Hutter:11randai, author = "Marcus Hutter", title = "Algorithmic Randomness as Foundation of Inductive Reasoning and Artificial Intelligence", booktitle = "Randomness through Computation", subtitle = "Some Answers, More Questions", chapter = "12", _editor = "H. Zenil", publisher = "World Scientific", pages = "159--169", _month = feb, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#randai", url = "http://arxiv.org/abs/1102.2468", pdf = "http://www.hutter1.net/publ/randai.pdf", latex = "http://www.hutter1.net/publ/randai.tex", project = "http://www.hutter1.net/official/projects.htm#ait", isbn = "981-4327-74-3", keywords = "algorithmic information theory; individual randomness; Ockham's razor; inductive reasoning; artificial intelligence.", abstract = "This article is a brief personal account of the past, present, and future of algorithmic randomness, emphasizing its role in inductive inference and artificial intelligence. It is written for a general audience interested in science and philosophy. Intuitively, randomness is a lack of order or predictability. If randomness is the opposite of determinism, then algorithmic randomness is the opposite of computability. Besides many other things, these concepts have been used to quantify Ockham's razor, solve the induction problem, and define intelligence.", for = "080401(40%),010405(20%),080101(10%),080199(30%)", }

@InCollection{Hutter:11unilearn, author = "Marcus Hutter", title = "Universal Learning Theory", booktitle = "Encyclopedia of Machine Learning", pages = "1001--1008", editor = "C. Sammut and G. Webb", publisher = "Springer", _month = feb, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#unilearn", url = "http://arxiv.org/abs/1102.2467", pdf = "http://www.hutter1.net/publ/unilearn.pdf", latex = "http://www.hutter1.net/publ/unilearn.tex", slides = "http://www.hutter1.net/ai/susp.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1007/978-0-387-30164-8", isbn = "978-0-387-30768-8", keywords = "Algorithmic probability; Ray Solomonoff; induction; prediction; decision; action; Turing machine; Kolmogorov complexity; universal prior; Bayes' rule.", abstract = "This encyclopedic article gives a mini-introduction into the theory of universal learning, founded by Ray Solomonoff in the 1960s and significantly developed and extended in the last decade. It explains the spirit of universal learning, but necessarily glosses over technical subtleties.", support = "ARC grant DP0988049", for = "080401(30%),010405(30%),080198(40%)", }

@Article{Hutter:11aixictwx, author = "Joel Veness and Kee Siong Ng and Marcus Hutter and William Uther and David Silver", title = "A {M}onte-{C}arlo {AIXI} Approximation", journal = "Journal of Artificial Intelligence Research", volume = "40", pages = "95--142", _publisher = "AAAI Press", _month = jan, year = "2011", bibtex = "http://www.hutter1.net/official/bib.htm#aixictwx", url = "http://arxiv.org/abs/0909.0801", pdf = "http://www.hutter1.net/publ/aixictwx.pdf", latex = "http://www.hutter1.net/publ/aixictwx.zip", slides = "http://www.hutter1.net/publ/saixictwx.pdf", award = "http://www.jair.org/bestpaper.html", project = "http://www.hutter1.net/official/projects.htm#uai", code = "http://www.hutter1.net/publ/aixictwxcode.zip", doi = "10.1613/jair.3125", issn = "1076-9757", keywords = "Reinforcement Learning (RL); Context Tree Weighting (CTW); Monte Carlo Tree Search (MCTS); Upper Confidence bounds applied to Trees (UCT); Partially Observable Markov Decision Process (POMDP); Prediction Suffix Trees (PST).", abstract = "This paper introduces a principled approach for the design of a scalable general reinforcement learning agent. Our approach is based on a direct approximation of AIXI, a Bayesian optimality notion for general reinforcement learning agents. Previously, it has been unclear whether the theory of AIXI could motivate the design of practical algorithms. We answer this hitherto open question in the affirmative, by providing the first computationally feasible approximation to the AIXI agent. To develop our approximation, we introduce a new Monte-Carlo Tree Search algorithm along with an agent-specific extension to the Context Tree Weighting algorithm. Empirically, we present a set of encouraging results on a variety of stochastic and partially observable domains. We conclude by proposing a number of directions for future research.", support = "ARC grant DP0988049", for = "080401(20%),010404(20%),080101(60%)", note = "Honorable Mention for the 2014 IJCAI-JAIR Best Paper Prize.", }

## %-------------Publications-of-Marcus-Hutter-2010--------------%

@InProceedings{Hutter:10ctoe, author = "Marcus Hutter", title = "Observer Localization in Multiverse Theories", booktitle = "Proceedings of the Conference in Honour of Murray Gell-Mann's 80th Birthday", subtitle = "Quantum Mechanics, Elementary Particles, Quantum Cosmology and Complexity", pages = "638--645", _editor = "H. Fritzsch and K. K. Phua", publisher = "World Scientific", _month = nov, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#ctoe", pdf = "http://www.hutter1.net/publ/ctoe.pdf", latex = "http://www.hutter1.net/publ/ctoe.tex", slides = "http://www.hutter1.net/publ/sctoe.pdf", video = "http://pirsa.org/displayFlash.php?id=18040117", project = "http://www.hutter1.net/official/projects.htm#physics", doi = "10.1142/9789814335614_0069", isbn = "9814335606", keywords = "world models; observer localization; predictive power; Ockham's razor; universal theories; computability.", abstract = "The progression of theories suggested for our world, from ego- to geo- to helio-centric models to universe and multiverse theories and beyond, shows one tendency: The size of the described worlds increases, with humans being expelled from their center to ever more remote and random locations. If pushed too far, a potential theory of everything (TOE) is actually more a theories of nothing (TON). Indeed such theories have already been developed. I show that including observer localization into such theories is necessary and sufficient to avoid this problem. I develop a quantitative recipe to identify TOEs and distinguish them from TONs and theories in-between. This precisely shows what the problem is with some recently suggested universal TOEs.", for = "080401(70%),020103(30%)", }

@TechReport{Hutter:10lorpc, author = "Minh-Ngoc Tran and Marcus Hutter", title = "Model Selection by Loss Rank for Classification and Unsupervised Learning", institution = "NUS and ANU", address = "Singapore and Australia", number = "arXiv:1011.1379", pages = "1--20", _month = nov, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#lorpc", url = "http://arxiv.org/abs/1011.1379", pdf = "http://www.hutter1.net/ai/lorpc.pdf", latex = "http://www.hutter1.net/ai/lorpc.zip", slides = "http://www.hutter1.net/ai/slorp.pdf", project = "http://www.hutter1.net/official/projects.htm#mdl", keywords = "Classification; graphical models; loss rank principle; model selection.", abstract = "Hutter (2007) recently introduced the loss rank principle (LoRP) as a generalpurpose principle for model selection. The LoRP enjoys many attractive properties and deserves further investigations. The LoRP has been well-studied for regression framework in Hutter and Tran (2010). In this paper, we study the LoRP for classification framework, and develop it further for model selection problems in unsupervised learning where the main interest is to describe the associations between input measurements, like cluster analysis or graphical modelling. Theoretical properties and simulation studies are presented.", for = "080401(20%),010405(50%),080198(30%)", seo = "970101(70%),970108(30%)", }

@Proceedings{Hutter:10altproc, editor = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann", title = "Algorithmic Learning Theory", subtitle = "21st International Conference ({ALT'10})", publisher = "Springer", address = "Canberra, Australia", series = "LNAI", volume = "6331", _month = oct, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#altproc10", http = "http://www.springer.com/computer/ai/book/978-3-642-16107-0", pdf = "http://www.hutter1.net/publ/altproc10.pdf", project = "http://www.hutter1.net/official/projects.htm#other", issn = "0302-9743", isbn = "978-3-642-16107-0", doi = "10.1007/978-3-642-16108-7", keywords = "statistical learning, grammatical inference, graph learning, PAC learning, query learning, algorithmic teaching, online learning, inductive inference, reinforcement learning, Kernel methods", abstract = "The LNAI series reports state-of-the-art results in artificial intelligence research, development, and education. This volume (LNAI 6331) contains research papers presented at the 21st International Conference on Algorithmic Learning Theory (ALT 2007), which was held in Canberra (Australia) during October 6-8, 2010. The main objective of the conference was to provide an interdisciplinary forum for high-quality talks with a strong theoretical background and scientific interchange in areas such as statistical learning, grammatical inference, graph learning, PAC learning, query learning, algorithmic teaching, online learning, inductive inference, reinforcement learning, Kernel methods. The conference was co-located with the 13th International Conference on Discovery Science (DS 2010). The volume includes 26 technical contributions that were selected from 44 submissions, and five invited talks presented to the audience of ALT and DS. Longer versions of the DS invited papers are available in the proceedings of DS 2010.", for = "080401(20%),010405(20%),080199(60%)", znote = "Acceptance rate: 26/44 = 59\%", }

@InProceedings{Hutter:10altintro, author = "Marcus Hutter and Frank Stephan and Vladimir Vovk and Thomas Zeugmann", title = "Algorithmic Learning Theory 2010: Editors' Introduction", booktitle = "Proc. 21st International Conf. on Algorithmic Learning Theory ({ALT'10})", address = "Canberra, Australia", series = "LNAI", volume = "6331", publisher = "Springer", pages = "1--10", _month = oct, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#altintro10", pdf = "http://www.hutter1.net/publ/altintro10.pdf", project = "http://www.hutter1.net/official/projects.htm#other", issn = "0302-9743", isbn = "978-3-642-16107-0", doi = "10.1007/978-3-642-16108-7_1", keywords = "algorithmic learning theory, query models, online learning, inductive inference, boosting, kernel methods, complexity and learning, reinforcement learning, unsupervised learning, grammatical inference, algorithmic forecasting.", abstract = "Learning theory is an active research area that incorporates ideas, problems, and techniques from a wide range of disciplines including statistics, artificial intelligence, information theory, pattern recognition, and theoretical computer science. The research reported at the 21st International Conference on Algorithmic Learning Theory (ALT 2010) ranges over areas such as query models, online learning, inductive inference, boosting, kernel methods, complexity and learning, reinforcement learning, unsupervised learning, grammatical inference, and algorithmic forecasting. In this introduction we give an overview of the five invited talks and the regular contributions of ALT 2010.", for = "080401(20%),010405(20%),080199(60%)", }

@InProceedings{Hutter:10phimp, author = "Peter Sunehag and Marcus Hutter", title = "Consistency of Feature {M}arkov Processes", booktitle = "Proc. 21st International Conf. on Algorithmic Learning Theory ({ALT'10})", address = "Canberra, Australia", series = "LNAI", volume = "6331", publisher = "Springer", pages = "360--374", _month = oct, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#phimp", url = "http://arxiv.org/abs/1007.2075", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT10/alt10.jhtml", pdf = "http://www.hutter1.net/publ/phimp.pdf", latex = "http://www.hutter1.net/publ/phimp.tex", slides = "http://www.hutter1.net/publ/sphimp.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", issn = "0302-9743", isbn = "978-3-642-16107-0", doi = "10.1007/978-3-642-16108-7_29", keywords = "Markov Process (MP); Hidden Markov Model (HMM); Finite State Machine (FSM); Probabilistic Deterministic Finite State Automata (PDFA); Penalized Maximum Likelihood (PML); ergodicity; asymptotic consistency; suffix trees; model selection; learning; reduction; side information; reinforcement learning.", abstract = "We are studying long term sequence prediction (forecasting). We approach this by investigating criteria for choosing a compact useful state representation. The state is supposed to summarize useful information from the history. We want a method that is asymptotically consistent in the sense it will provably eventually only choose between alternatives that satisfy an optimality property related to the used criterion. We extend our work to the case where there is side information that one can take advantage of and, furthermore, we briefly discuss the active setting where an agent takes actions to achieve desirable outcomes.", support = "ARC grant DP0988049", for = "080401(30%),010405(30%),080101(20%),080198(20%)", znote = "Acceptance rate: 26/44 = 59\%", }

@Article{Hutter:10ctoex, author = "Marcus Hutter", title = "A Complete Theory of Everything (will be subjective)", journal = "Algorithms", volume = "3", number = "4", pages = "329--350", _month = sep, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#ctoex", url = "http://arxiv.org/abs/0912.5434", pdf = "http://www.hutter1.net/publ/ctoex.pdf", latex = "http://www.hutter1.net/publ/ctoex.tex", slides = "http://www.hutter1.net/publ/sctoe.pdf", video = "http://pirsa.org/displayFlash.php?id=18040117", art = "http://www.hutter1.net/publ/ctoel.jpg", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.3390/a3040329", issn = "1999-4893", keywords = "world models; observer localization; predictive power; Ockham's razor; universal theories; inductive reasoning; simplicity and complexity; universal self-sampling; no-free-lunch; computability.", abstract = "Increasingly encompassing models have been suggested for our world. Theories range from generally accepted to increasingly speculative to apparently bogus. The progression of theories from ego- to geo- to helio-centric models to universe and multiverse theories and beyond was accompanied by a dramatic increase in the sizes of the postulated worlds, with humans being expelled from their center to ever more remote and random locations. Rather than leading to a true theory of everything, this trend faces a turning point after which the predictive power of such theories decreases (actually to zero). Incorporating the location and other capacities of the observer into such theories avoids this problem and allows to distinguish meaningful from predictively meaningless theories. This also leads to a truly complete theory of everything consisting of a (conventional objective) theory of everything plus a (novel subjective) observer process. The observer localization is neither based on the controversial anthropic principle, nor has it anything to do with the quantum-mechanical observation process. The suggested principle is extended to more practical (partial, approximate, probabilistic, parametric) world models (rather than theories of everything). Finally, I provide a justification of Ockham's razor, and criticize the anthropic principle, the doomsday argument, the no free lunch theorem, and the falsifiability dogma.", for = "080401(70%),020103(30%)", }

@TechReport{Hutter:10pdpx, author = "Wray Buntine and Marcus Hutter", title = "A {B}ayesian Review of the {P}oisson-{D}irichlet Process", institution = "NICTA and ANU", address = "Australia", number = "arXiv:1007.0296", _month = jul, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#pdpx", url = "http://arxiv.org/abs/1007.0296", pdf = "http://www.hutter1.net/publ/pdpx.pdf", latex = "http://www.hutter1.net/publ/pdpx.zip", slides = "http://www.hutter1.net/publ/spdp.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", keywords = "Pitman-Yor process; Dirichlet; two-parameter Poisson-Dirichlet process; Chinese Restaurant Process; Consistency; (non)atomic distributions; Bayesian interpretation.", abstract = "The two parameter Poisson-Dirichlet process is also known as the Pitman-Yor Process and related to the Chinese Restaurant Process, is a generalisation of the Dirichlet Process, and is increasingly being used for probabilistic modelling in discrete areas such as language and images. This article reviews the theory of the Poisson-Dirichlet process in terms of its consistency for estimation, the convergence rates and the posteriors of data. This theory has been well developed for continuous distributions (more generally referred to as non-atomic distributions). This article then presents a Bayesian interpretation of the Poisson-Dirichlet process: it is a mixture using an improper and infinite dimensional Dirichlet distribution. This interpretation requires technicalities of priors, posteriors and Hilbert spaces, but conceptually, this means we can understand the process as just another Dirichlet and thus all its sampling properties fit naturally. Finally, this article also presents results for the discrete case which is the case seeing widespread use now in computer science, but which has received less attention in the literature.", for = "080404(50%),080405(50%)", }

@InProceedings{Hutter:10aixictw, author = "Joel Veness and Kee Siong Ng and Marcus Hutter and David Silver", title = "Reinforcement Learning via {AIXI} Approximation", booktitle = "Proc. 24th AAAI Conference on Artificial Intelligence", pages = "605--611", _editor = "Maria Fox and David Poole", publisher = "AAAI Press", address = "Atlanta, USA", _month = jul, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#aixictw", url = "http://arxiv.org/abs/1007.2049", pdf = "http://www.hutter1.net/publ/aixictw.pdf", latex = "http://www.hutter1.net/publ/aixictw.zip", slides = "http://www.hutter1.net/publ/saixictw.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", code = "http://www.jveness.info/software/mc-aixi-src-1.0.zip", keywords = "Reinforcement Learning (RL); Context Tree Weighting (CTW); Monte Carlo Tree Search (MCTS); Upper Confidence bounds applied to Trees (UCT); Partially Observable Markov Decision Process (POMDP); Prediction Suffix Trees (PST).", abstract = "This paper introduces a principled approach for the design of a scalable general reinforcement learning agent. This approach is based on a direct approximation of AIXI, a Bayesian optimality notion for general reinforcement learning agents. Previously, it has been unclear whether the theory of AIXI could motivate the design of practical algorithms. We answer this hitherto open question in the affirmative, by providing the first computationally feasible approximation to the AIXI agent. To develop our approximation, we introduce a Monte Carlo Tree Search algorithm along with an agent-specific extension of the Context Tree Weighting algorithm. Empirically, we present a set of encouraging results on a number of stochastic, unknown, and partially observable domains.", support = "ARC grant DP0988049", for = "080401(20%),010404(20%),080101(60%)", znote = "Acceptance rate: 264/982 = 27\%", }

@Article{Hutter:10cnlohx, author = "Paola M. V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee", title = "An Integrated {B}ayesian Analysis of {LOH} and Copy Number Data", journal = "BMC Bioinformatics", volume = "11", number = "321", pages = "1--18", _month = jun, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#cnlohx", http = "http://www.biomedcentral.com/1471-2105/11/321", supplement = "http://www.biomedcentral.com/imedia/1222342299388240/supp2.pdf", pdf = "http://www.hutter1.net/publ/cnlohx.pdf", slides = "http://www.hutter1.net/publ/scnloh.pdf", poster = "http://www.hutter1.net/publ/pcnloh.pdf", project = "http://www.hutter1.net/official/projects.htm#big", code = "http://www.biomedcentral.com/imedia/1280629245356661/supp1.zip", doi = "10.1186/1471-2105-11-321", issn = "1471-2105", keywords = "Bayesian regression; piecewise constant function; change point problem; DNA copy number estimation; LOH estimation", abstract = "Background: Cancer and other disorders are due to genomic lesions. SNP-microarrays are able to measure simultaneously both genotype and copy number (CN) at several Single Nucleotide Polymorphisms (SNPs) along the genome. CN is defined as the number of DNA copies, and the normal is two, since we have two copies of each chromosome. The genotype of a SNP is the status given by the nucleotides (alleles) which are present on the two copies of DNA. It is defined homozygous or heterozygous if the two alleles are the same or if they differ, respectively. Loss of heterozygosity (LOH) is the loss of the heterozygous status due to genomic events. Combining CN and LOH data, it is possible to better identify different types of genomic aberrations. For example, a long sequence of homozygous SNPs might be caused by either the physical loss of one copy or a uniparental disomy event (UPD), i.e. each SNP has two identical nucleotides both derived from only one parent. In this situation, the knowledge of the CN can help in distinguishing between these two events. Results: To better identify genomic aberrations, we propose a method (called gBPCR) which infers the type of aberration occurred, taking into account all the possible influence in the microarray detection of the homozygosity status of the SNPs, resulting from an altered CN level. Namely, we model the distributions of the detected genotype, given a specific genomic alteration and we estimate the parameters involved on public reference datasets. The estimation is performed similarly to the modified Bayesian Piecewise Constant Regression, but with improved estimators for the detection of the breakpoints. Using artificial and real data, we evaluate the quality of the estimation of gBPCR and we also show that it outperforms other well-known methods for LOH estimation. Conclusions: We propose a method (gBPCR) for the estimation of both LOH and CN aberrations, improving their estimation by integrating both types of data and accounting for their relationships. Moreover, gBPCR performed very well in comparison with other methods for LOH estimation and the estimated CN lesions on real data have been validated with another technique.", support = "Swiss National Science Foundation grants 205321-112430 and 205320-121886/1; On-cosuisse grants OCS-1939-8-2006 and OCS-02296-08-2008; Cantone Ticino Ticino in rete grant; Fondazione per la Ricerca e la Cura sui Linfomi (Lugano, Switzerland)", alt = "Also talk at 10th ISBA and IWPACBB'09", for = "010405(50%),060405(50%)", }

@Book{Hutter:10agiproc, editor = "Eric Baum and Marcus Hutter and Emanuel Kitzelmann", title = "Artificial General Intelligence", subtitle = "3rd Conference ({AGI'10}) in Memoriam Ray Solomonoff", publisher = "Atlantis Press", address = "Lugano, Switzerland", _month = mar, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#agiproc10", http = "http://www.atlantis-press.com/publications/aisr/AGI-10/", pdf = "http://www.hutter1.net/ai/agifb10.pdf", pdffull = "http://www.hutter1.net/ai/agiproc10.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "1951-6851", isbn = "978-90-78677-36-9", abstract = "The Conference on Artificial General Intelligence is the only major conference series devoted wholly and specifically to the creation of AI systems possessing general intelligence at the human level and ultimately beyond. Its third installation, AGI-10, was help in Lugano, Switzerland, March 5-8, 2010, in Memoriam Ray Solomonoff (1926-2009), pioneer of machine learning, founder of algorithmic probability theory, and father of the universal theory of inductive inference. The conference attracted 66 paper submissions of which 29 (i.e., 44\%) were accepted as full papers for presentation at the conference. Additional 12 papers were included as short position papers. The program also included a keynote address by the reinforcement learning pioneer Richard Sutton, two post-conference workshops, and a number of pre-conference tutorials on various topics related to AGI.", for = "080101(50%),080199(50%)", }

@Article{Hutter:10lorpx, author = "Marcus Hutter and Minh Tran", title = "Model Selection with the Loss Rank Principle", volume = "54", journal = "Computational Statistics and Data Analysis", publisher = "Elsevier", pages = "1288--1306", _month = feb, year = "2010", bibtex = "http://www.hutter1.net/official/bib.htm#lorpx", url = "http://arxiv.org/abs/1003.0516", pdf = "http://www.hutter1.net/ai/lorpx.pdf", ps = "http://www.hutter1.net/ai/lorpx.ps", latex = "http://www.hutter1.net/ai/lorpx.zip", slides = "http://www.hutter1.net/ai/slorp.pdf", project = "http://www.hutter1.net/official/projects.htm#mdl", code = "http://www.hutter1.net/ai/lorpcode.zip", doi = "10.1016/j.csda.2009.11.015", issn = "0167-9473", keywords = "Model selection, loss rank principle, non-parametric regression, classification general loss function, k nearest neighbors.", abstract = "A key issue in statistics and machine learning is to automatically select the ``right'' model complexity, e.g., the number of neighbors to be averaged over in k nearest neighbor (kNN) regression or the polynomial degree in regression with polynomials. We suggest a novel principle - the Loss Rank Principle (LoRP) - for model selection in regression and classification. It is based on the loss rank, which counts how many other (fictitious) data would be fitted better. LoRP selects the model that has minimal loss rank. Unlike most penalized maximum likelihood variants (AIC, BIC, MDL), LoRP depends only on the regression functions and the loss function. It works without a stochastic noise model, and is directly applicable to any non-parametric regressor, like kNN.", for = "080401(20%),010405(80%)", }

## %-------------Publications-of-Marcus-Hutter-2009--------------%

@InProceedings{Hutter:09mdltvp, author = "Marcus Hutter", title = "Discrete {MDL} Predicts in Total Variation", booktitle = "Advances in Neural Information Processing Systems 22 ({NIPS'09})", pages = "817--825", _editor = "Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta", publisher = "Curran Associates", address = "Cambridge, MA, USA", _month = dec, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#mdltvp", url = "http://arxiv.org/abs/0909.4588", pdf = "http://www.hutter1.net/ai/mdltvp.pdf", ps = "http://www.hutter1.net/ai/mdltvp.ps", latex = "http://www.hutter1.net/ai/mdltvp.tex", slides = "http://www.hutter1.net/ai/smdltvp.pdf", project = "http://www.hutter1.net/official/projects.htm#mdl", isbn = "1615679111", keywords = "minimum description length; countable model class; total variation distance; sequence prediction; discriminative learning; reinforcement learning.", abstract = "The Minimum Description Length (MDL) principle selects the model that has the shortest code for data plus model. We show that for a countable class of models, MDL predictions are close to the true distribution in a strong sense. The result is completely general. No independence, ergodicity, stationarity, identifiability, or other assumption on the model class need to be made. More formally, we show that for any countable class of models, the distributions selected by MDL (or MAP) asymptotically predict (merge with) the true measure in the class in total variation distance. Implications for non-i.i.d. domains like time-series forecasting, discriminative learning, and reinforcement learning are discussed.", for = "080401(30%),010405(50%),080198(20%)", znote = "Acceptance rate: 263/1105 = 24\%", }

@InProceedings{Hutter:09wheel, author = "Marcus Hutter and Nathan Brewer", title = "Matching 2-D Ellipses to 3-D Circles with Application to Vehicle Pose Estimation", booktitle = "Proc. 24th Conf. on Image and Vision Computing New Zealand ({IVCNZ'09})", pages = "153--158", _editor = "Donald Bailey", publisher = "IEEE Xplore", address = "Wellington, New Zealand", _month = nov, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#wheel", url = "http://arxiv.org/abs/0912.3589", pdf = "http://www.hutter1.net/ai/wheel.pdf", latex = "http://www.hutter1.net/ai/wheel.zip", slides = "http://www.hutter1.net/ai/swheel.pdf", project = "http://www.hutter1.net/official/projects.htm#icar", code = "http://www.hutter1.net/ai/wheelcode.zip", doi = "10.1109/IVCNZ.2009.5378421", issn = "2151-2205", keywords = "computer vision; image recognition/processing; ellipse detection; 3d models; 2d-ellipse to 3d-circle matching; single image pose identification; wheel detection; 3d vehicle models.", abstract = "Finding the three-dimensional representation of all or a part of a scene from a single two dimensional image is a challenging task. In this paper we propose a method for identifying the pose and location of objects with circular protrusions in three dimensions from a single image and a 3d representation or model of the object of interest. To do this, we present a method for identifying ellipses and their properties quickly and reliably with a novel technique that exploits intensity differences between objects and a geometric technique for matching an ellipse in 2d to a circle in 3d. We apply these techniques to the specific problem of determining the pose and location of vehicles, particularly cars, from a single image. We have achieved excellent pose recovery performance on artificially generated car images and show promising results on real vehicle images. We also make use of the ellipse detection method to identify car wheels from images, with a very high successful match rate.", support = "ControlExpert GmbH", znote = "Acceptance rate: 79/142 = 56\%", }

@Article{Hutter:09mbpcrcode, author = "Paola M.V. Rancoita and Marcus Hutter", title = "mBPCR: A Package for DNA Copy Number Profile Estimation", journal = "BioConductor -- Open Source Software for BioInformatics", number = "0.99", pages = "1--25", _month = oct, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#mbpcrcode", url = "http://www.bioconductor.org/packages/devel/bioc/html/mBPCR.html", pdf = "http://www.hutter1.net/ai/mbpcrcode.pdf", project = "http://www.hutter1.net/official/projects.htm#big", code = "http://www.hutter1.net/ai/mbpcrcode.tar.gz", keywords = "Bayesian regression, exact polynomial algorithm, piecewise constant function, mBPCR, DNA copy number estimation, micro arrays, genomic aberrations, R package.", abstract = "The algorithm mBPCR is a tool for estimating the profile of the log2ratio of copy number data. The procedure is a Bayesian piecewise constant regression and can be applied, generally, to estimate any piecewise constant function (like the log2ratio of the copy number data). The algorithm has been implemented in R and integrated into bioconductor, an open source software for bioinformatics. This document describes how to use the mBPCR bioconductor package in general and on several examples.", support = "SNF grant 205321-112430", }

@Article{Hutter:09phimdpx, author = "Marcus Hutter", title = "Feature Reinforcement Learning: Part {I}: Unstructured {MDP}s", journal = "Journal of Artificial General Intelligence", volume = "1", pages = "3--24", _month = oct, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#phimdpx", url = "http://arxiv.org/abs/0906.1713", pdf = "http://www.hutter1.net/ai/phimdpx.pdf", ps = "http://www.hutter1.net/ai/phimdpx.ps", latex = "http://www.hutter1.net/ai/phimdpx.tex", slides = "http://www.hutter1.net/ai/sphimdp.pdf", video = "http://www.vimeo.com/7390883", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "1946-0163", keywords = "Reinforcement learning; Markov decision process; partial observability; feature learning; explore-exploit; information \& complexity; rational agents.", abstract = "General-purpose, intelligent, learning agents cycle through sequences of observations, actions, and rewards that are complex, uncertain, unknown, and non-Markovian. On the other hand, reinforcement learning is well-developed for small finite state Markov decision processes (MDPs). Up to now, extracting the right state representations out of bare observations, that is, reducing the general agent setup to the MDP framework, is an art that involves significant effort by designers. The primary goal of this work is to automate the reduction process and thereby significantly expand the scope of many existing reinforcement learning algorithms and the agents that employ them. Before we can think of mechanizing this search for suitable MDPs, we need a formal objective criterion. The main contribution of this article is to develop such a criterion. I also integrate the various parts into one learning algorithm. Extensions to more realistic dynamic Bayesian networks are developed in Part II. The role of POMDPs is also considered there.", }

@Article{Hutter:09phidbnx, author = "M. Hutter", title = "Feature Reinforcement Learning: Part {II}: Structured {MDP}s", journal = "Journal of Artificial General Intelligence", pages = "71--86", _month = jun, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#phidbnx", pdf = "http://www.hutter1.net/publ/phidbnx.pdf", slides = "http://www.hutter1.net/publ/sphimdp.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "http://doi.org/10.2478/jagi-2021-0003", keywords = "Reinforcement learning; dynamic Bayesian network; structure learning; feature selection; global vs. local reward; explore-exploit; information \& complexity; rational agents; partial observability", abstract = "The Feature Markov Decision Processes (PhiMDP) model developed in Part I is well-suited for learning agents in general environments. Nevertheless, unstructured (Phi)MDPs are limited to relatively simple environments. Structured MDPs like Dynamic Bayesian Networks (DBNs) are used for large-scale real-world problems. In this article I extend PhiMDP to PhiDBN. The primary contribution is to derive a cost criterion that allows to automatically extract the most relevant features from the environment, leading to the ``best'' DBN representation. I discuss all building blocks required for a complete general learning algorithm, and compare the novel PhiDBN model to the prevalent POMDP approach.", }

@Article{Hutter:09aixiopen, author = "Marcus Hutter", title = "Open Problems in Universal Induction \& Intelligence", journal = "Algorithms", volume = "3", number = "2", pages = "879--906", _month = jul, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#aixiopen", url = "http://arxiv.org/abs/0907.0746", pdf = "http://www.hutter1.net/ai/aixiopen.pdf", ps = "http://www.hutter1.net/ai/aixiopen.ps", latex = "http://www.hutter1.net/ai/aixiopen.tex", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.3390/a2030879", issn = "1999-4893", keywords = "Kolmogorov complexity; information theory; sequential decision theory; reinforcement learning; artificial intelligence; universal Solomonoff induction; rational agents.", abstract = "Specialized intelligent systems can be found everywhere: finger print, handwriting, speech, and face recognition, spam filtering, chess and other game programs, robots, et al. This decade the first presumably complete {\em mathematical} theory of artificial intelligence based on universal induction-prediction-decision-action has been proposed. This information-theoretic approach solidifies the foundations of inductive inference and artificial intelligence. Getting the foundations right usually marks a significant progress and maturing of a field. The theory provides a gold standard and guidance for researchers working on intelligent algorithms. The roots of universal induction have been laid exactly half-a-century ago and the roots of universal intelligence exactly one decade ago. So it is timely to take stock of what has been achieved and what remains to be done. Since there are already good recent surveys, I describe the state-of-the-art only in passing and refer the reader to the literature. This article concentrates on the open problems in universal induction and its extension to universal intelligence.", }

@InProceedings{Hutter:09cnloh, author = "Paola M.V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee", title = "Bayesian Joint Estimation of {CN} and {LOH} Aberrations", booktitle = "Proc. 3rd International Workshop on Practical Applications of Computational Biology & Bioinformatics ({IWPACBB'09}) ", volume = "5518", series = "LNCS", pages = "1109--1117", _editor = "S. Omatu et al.", publisher = "Springer", address = "Salamanca, Spain", _month = jun, year = "2009", url = "http://iwpacbb.usal.es/", pdf = "http://www.hutter1.net/publ/cnloh.pdf", slides = "http://www.hutter1.net/publ/scnloh.pdf", poster = "http://www.hutter1.net/publ/pcnloh.pdf", http = "http://iwpacbb.usal.es/", doi = "10.1007/978-3-642-02481-8_168", issn = "0302-9743", isbn = "978-3-642-02480-1", keywords = "Bayesian regression; piecewise constant function; change point problem; DNA copy number estimation; LOH estimation", abstract = "SNP-microarrays are able to measure simultaneously both copy number and genotype at several single nucleotide polymorphism positions. Combining the two data, it is possible to better identify genomic aberrations. For this purpose, we propose a Bayesian piecewise constant regression which infers the type of aberration occurred, taking into account all the possible influence in the microarray detection of the genotype, resulting from an altered copy number level. Namely, we model the distributions of the detected genotype given a specific genomic alteration and we estimate the hyper-parameters used on public reference datasets.", support = "Swiss National Science Foundation grant 205321-112430; Oncosuisse grants OCS-1939-8-2006 and OCS-02296-08-2008; Cantone Ticino ``Ticino in rete'' grant; Fondazione per la Ricerca e la Cura sui Linfomi (Lugano, Switzerland)", }

@InProceedings{Hutter:09ldof, author = "Ke Zhang and Marcus Hutter and Warren Jin", title = "A New Local Distance-based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD'09)", series = "LNAI", volume = "5467", pages = "813--822", _editor = "T. Theeramunkong and B. Kijsirikul and N. Cercone and H. T. Bao", publisher = "Springer", address = "Bangkok, Thailand", _month = apr, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#ldof", url = "http://arxiv.org/abs/0903.3257", pdf = "http://www.hutter1.net/ai/ldof.pdf", ps = "http://www.hutter1.net/ai/ldof.ps", latex = "http://www.hutter1.net/ai/ldof.zip", slides = "http://www.hutter1.net/ai/sldof.pdf", project = "http://www.hutter1.net/official/projects.htm#???", doi = "10.1007/978-3-642-01307-2_84", issn = "0302-9743 ", isbn = "978-3-642-01306-5", keywords = "local outlier; scattered data; k-distance; KNN; LOF; LDOF.", abstract = "Detecting outliers which are grossly different from or inconsistent with the remaining dataset is a major challenge in real-world KDD applications. Existing outlier detection methods are ineffective on scattered real-world datasets due to implicit data patterns and parameter setting issues. We define a novel ``Local Distance-based Outlier Factor'' (LDOF) to measure the outlier-ness of objects in scattered datasets which addresses these issues. LDOF uses the relative location of an object to its neighbours to determine the degree to which the object deviates from its neighbourhood. Properties of LDOF are theoretically analysed including LDOF's lower bound and its false-detection probability, as well as parameter settings. In order to facilitate parameter settings in real-world applications, we employ a top-n technique in our outlier detection approach, where only the objects with the highest LDOF values are regarded as outliers. Compared to conventional approaches (such as top-n KNN and top-n LOF), our method top-n LDOF is more effective at detecting outliers in scattered data. It is also easier to set parameters, since its performance is relatively stable over a large range of parameter values, as illustrated by experimental results on both real-world and synthetic datasets.", znote = "Acceptance rate: 111/338 = 33\%", }

@Article{Hutter:09alttcs, author = "Marcus Hutter and Rocco A. Servedio", title = "{ALT'07} Special Issue", journal = "Theoretical Computer Science", _editor = "Marcus Hutter and Rocco A. Servedio", volume = "410", number = "19", pages = "1747--1748/1912", _month = apr, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#alttcs", http = "http://www.sciencedirect.com/science/journal/03043975/410/19", doi = "10.1016/j.tcs.2009.01.008", issn = "0304-3975", keywords = "algorithmic learning theory, special issue, preface", abstract = "This special issue contains expanded versions of papers that appeared in preliminary form in the proceedings of the 18th International Conference on Algorithmic Learning Theory (ALT 2007), which was held in Sendai, Japan during October 1--4, 2007. \emph{Algorithmic Learning Theory} is a conference series which is dedicated to the theoretical study of the algorithmic aspects of learning. The best papers of the conference ALT 2007 were invited for this special issue and after a thorough reviewing process, most of them qualified for this Special Issue on Algorithmic Learning Theory of Theoretical Computer Science. The preface contains a short introduction to each of these papers.", }

@Article{Hutter:09improbx, author = "Alberto Piatti and Marco Zaffalon and Fabio Trojani and Marcus Hutter", title = "Limits of Learning about a Categorical Latent Variable under Prior Near-Ignorance", journal = "International Journal of Approximate Reasoning", volume = "50", number = "4", pages = "597--611", _month = apr, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#improbx", url = "http://arxiv.org/abs/0904.4527", pdf = "http://www.hutter1.net/ai/improbx.pdf", ps = "http://www.hutter1.net/ai/improbx.ps", latex = "http://www.hutter1.net/ai/improbx.tex", slides = "http://www.hutter1.net/ai/simprob.pdf", project = "http://www.hutter1.net/official/projects.htm#robust", doi = "10.1016/j.ijar.2008.08.003", issn = "0888-613X", keywords = "Near-ignorance set of priors; Latent variables; Imprecise Dirichlet model.", abstract = "In this paper, we consider the coherent theory of (epistemic) uncertainty of Walley, in which beliefs are represented through sets of probability distributions, and we focus on the problem of modeling prior ignorance about a categorical random variable. In this setting, it is a known result that a state of prior ignorance is not compatible with learning. To overcome this problem, another state of beliefs, called \emph{near-ignorance}, has been proposed. Near-ignorance resembles ignorance very closely, by satisfying some principles that can arguably be regarded as necessary in a state of ignorance, and allows learning to take place. What this paper does, is to provide new and substantial evidence that also near-ignorance cannot be really regarded as a way out of the problem of starting statistical inference in conditions of very weak beliefs. The key to this result is focusing on a setting characterized by a variable of interest that is \emph{latent}. We argue that such a setting is by far the most common case in practice, and we provide, for the case of categorical latent variables (and general \emph{manifest} variables) a condition that, if satisfied, prevents learning to take place under prior near-ignorance. This condition is shown to be easily satisfied even in the most common statistical problems. We regard these results as a strong form of evidence against the possibility to adopt a condition of prior near-ignorance in real statistical problems.", }

@TechReport{Hutter:09bayestreex, author = "Marcus Hutter", title = "Exact Non-Parametric {B}ayesian Inference on Infinite Trees", number = "0903.5342", institution = "ARXIV", _month = mar, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#bayestreex", url = "http://arxiv.org/abs/0903.5342", pdf = "http://www.hutter1.net/ai/bayestreex.pdf", ps = "http://www.hutter1.net/ai/bayestreex.ps", latex = "http://www.hutter1.net/ai/bayestreex.zip", slides = "http://www.hutter1.net/ai/sbayestree.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", code = "http://www.hutter1.net/ai/bayestree.c", keywords = "Bayesian density estimation, exact linear time algorithm, non-parametric inference, adaptive infinite tree, Polya tree, scale invariance, consistency, asymptotics.", msc = "62G07; 60B10; 68W99", abstract = "Given i.i.d. data from an unknown distribution, we consider the problem of predicting future items. An adaptive way to estimate the probability density is to recursively subdivide the domain to an appropriate data-dependent granularity. A Bayesian would assign a data-independent prior probability to ``subdivide'', which leads to a prior over infinite(ly many) trees. We derive an exact, fast, and simple inference algorithm for such a prior, for the data evidence, the predictive distribution, the effective model dimension, moments, and other quantities. We prove asymptotic convergence and consistency results, and illustrate the behavior of our model on some prototypical functions.", }

@Book{Hutter:09agiproc, editor = "Ben Goertzel and Pascal Hitzler and Marcus Hutter", title = "Artificial General Intelligence", subtitle = "2nd Conference ({AGI'09})", publisher = "Atlantis Press", address = "Arlington, USA", _month = mar, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#agiproc09", http = "http://www.atlantis-press.com/publications/aisr/AGI-09/", pdf = "http://www.hutter1.net/ai/agifb09.pdf", pdfall = "http://www.hutter1.net/ai/agiproc09.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", issn = "1951-6851", isbn = "978-90-78677-24-6", abstract = "The Conference on Artificial General Intelligence is the only major conference series devoted wholly and specifically to the creation of AI systems possessing general intelligence at the human level and ultimately beyond. Its second installation, AGI-09, in Arlington, Virginia, March 6-9, 2009, attracted 67 paper submissions, which is a substantial increase from the previous year. Of these submissions, 33 (i.e., 49\%) were accepted as full papers for presentation at the conference. Additional 13 papers were included as position papers. The program also included a keynote address by J{\"u}rgen Schmidhuber on \emph{The New AI}, a post-conference workshop on \emph{The Future of AI}, and a number of pre-conference tutorials on various topics related to AGI.", }

@InProceedings{Hutter:09phimdp, author = "Marcus Hutter", title = "Feature {M}arkov Decision Processes", booktitle = "Proc. 2nd Conf. on Artificial General Intelligence ({AGI'09})", subtitle = "Advances in Intelligent Systems Research", volume = "8", pages = "61--66", publisher = "Atlantis Press", _address = "Arlington, Virginia", _month = mar, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#phimdp", url = "http://arXiv.org/abs/0812.4580", pdf = "http://www.hutter1.net/ai/phimdp.pdf", ps = "http://www.hutter1.net/ai/phimdp.ps", latex = "http://www.hutter1.net/ai/phimdp.tex", slides = "http://www.hutter1.net/ai/sphimdp.pdf", video = "http://www.vimeo.com/7390883", award = "http://agi-conf.org/2009/kurzweilprize.php", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.2991/agi.2009.30", issn = "1951-6851", isbn = "978-90-78677-24-6", keywords = "Reinforcement learning; Markov decision process; partial observability; feature learning; explore-exploit.", abstract = "General purpose intelligent learning agents cycle through (complex,non-MDP) sequences of observations, actions, and rewards. On the other hand, reinforcement learning is well-developed for small finite state Markov Decision Processes (MDPs). So far it is an art performed by human designers to extract the right state representation out of the bare observations, i.e. to reduce the agent setup to the MDP framework. Before we can think of mechanizing this search for suitable MDPs, we need a formal objective criterion. The main contribution of this article is to develop such a criterion. I also integrate the various parts into one learning algorithm. Extensions to more realistic dynamic Bayesian networks are developed in a companion article.", znote = "Acceptance rate: 33/67 = 49\%. First Runner-Up for the Kurzweil Best Paper Award", }

@InProceedings{Hutter:09phidbn, author = "Marcus Hutter", title = "Feature Dynamic {B}ayesian Networks", booktitle = "Proc. 2nd Conf. on Artificial General Intelligence ({AGI'09})", subtitle = "Advances in Intelligent Systems Research", volume = "8", pages = "67--73", publisher = "Atlantis Press", _address = "Arlington, Virginia", _month = mar, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#phidbn", url = "http://arXiv.org/abs/0812.4581", pdf = "http://www.hutter1.net/ai/phidbn.pdf", ps = "http://www.hutter1.net/ai/phidbn.ps", latex = "http://www.hutter1.net/ai/phidbn.tex", slides = "http://www.hutter1.net/ai/sphimdp.pdf", video = "http://www.vimeo.com/7390883", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.2991/agi.2009.6", issn = "1951-6851", isbn = "978-90-78677-24-6", keywords = "Reinforcement learning; dynamic Bayesian network; structure learning; feature learning; global vs. local reward; explore-exploit.", abstract = "Feature Markov Decision Processes (PhiMDPs) are well-suited for learning agents in general environments. Nevertheless, unstructured (Phi)MDPs are limited to relatively simple environments. Structured MDPs like Dynamic Bayesian Networks (DBNs) are used for large-scale real-world problems. In this article I extend PhiMDP to PhiDBN. The primary contribution is to derive a cost criterion that allows to automatically extract the most relevant features from the environment, leading to the ``best'' DBN representation. I discuss all building blocks required for a complete general learning algorithm.", znote = "Acceptance rate: 33/67 = 49\%", }

@Article{Hutter:09idmx, author = "Marcus Hutter", title = "Practical Robust Estimators under the {I}mprecise {D}irichlet {M}odel", journal = "International Journal of Approximate Reasoning", volume = "50", number = "2", pages = "231--242", _month = feb, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#idmx", url = "http://arxiv.org/abs/0901.4137", pdf = "http://www.hutter1.net/ai/idmx.pdf", ps = "http://www.hutter1.net/ai/idmx.ps", latex = "http://www.hutter1.net/ai/idmx.tex", slides = "http://www.hutter1.net/ai/sidm.pdf", project = "http://www.hutter1.net/official/projects.htm#robust", doi = "10.1016/j.ijar.2008.03.020", issn = "0888-613X", keywords = "Imprecise Dirichlet Model; exact, conservative, approximate, robust, credible interval estimates; entropy; mutual information.", abstract = "Walley's Imprecise Dirichlet Model (IDM) for categorical i.i.d.\ data extends the classical Dirichlet model to a set of priors. It overcomes several fundamental problems which other approaches to uncertainty suffer from. Yet, to be useful in practice, one needs efficient ways for computing the imprecise=robust sets or intervals. The main objective of this work is to derive exact, conservative, and approximate, robust and credible interval estimates under the IDM for a large class of statistical estimators, including the entropy and mutual information.", }

@Article{Hutter:09bcna, author = "Paola M. V. Rancoita and Marcus Hutter and Francesco Bertoni and Ivo Kwee", title = "Bayesian {DNA} Copy Number Analysis", journal = "BMC Bioinformatics", volume = "10", number = "10", pages = "1--19", _month = jan, year = "2009", bibtex = "http://www.hutter1.net/official/bib.htm#bcna", http = "http://www.biomedcentral.com/1471-2105/10/10", supplement = "http://www.biomedcentral.com/content/supplementary/1471-2105-10-10-s2.pdf", pdf = "http://www.hutter1.net/ai/bcna.pdf", slides = "http://www.hutter1.net/ai/sbcna.pdf", code = "http://www.biomedcentral.com/content/supplementary/1471-2105-10-10-s1.zip", doi = "10.1186/1471-2105-10-10", issn = "1471-2105", keywords = "Bayesian regression, exact polynomial algorithm, piecewise constant function, mBPCR, DNA copy number estimation, micro arrays, genomic aberrations.", abstract = "Background: Some diseases, like tumors, can be related to chromosomal aberrations, leading to changes of DNA copy number. The copy number of an aberrant genome can be represented as a piecewise constant function, since it can exhibit regions of deletions or gains. Instead, in a healthy cell the copy number is two because we inherit one copy of each chromosome from each our parents. Bayesian Piecewise Constant Regression (BPCR) is a Bayesian regression method for data that are noisy observations of a piecewise constant function. The method estimates the unknown segment number, the endpoints of the segments and the value of the segment levels of the underlying piecewise constant function. The Bayesian Regression Curve (BRC) estimates the same data with a smoothing curve. However, in the original formulation, some estimators failed to properly determine the corresponding parameters. For example, the boundary estimator did not take into account the dependency among the boundaries and succeeded in estimating more than one breakpoint at the same position, losing segments. Results: We derived an improved version of the BPCR (called mBPCR) and BRC, changing the segment number estimator and the boundary estimator to enhance the fitting procedure. We also proposed an alternative estimator of the variance of the segment levels, which is useful in case of data with high noise. Using artificial data, we compared the original and the modified version of BPCR and BRC with other regression methods, showing that our improved version of BPCR generally outperformed all the others. Similar results were also observed on real data. Conclusions: We propose an improved method for DNA copy number estimation, mBPCR, which performed very well compared to previously published algorithms. In particular, mBPCR was more powerful in the detection of the true position of the breakpoints and of small aberrations in very noisy data. Hence, from a biological point of view, our method can be very useful, for example, to find targets of genomic aberrations in clinical cancer samples.", support = "SNF grant 205321-112430", znote = "Marked as highly accessed.", alt = "Also 2-page abstract and poster at 9th ISBA and 18th MASAMB meetings (2008)", abstract2p = "http://www.hutter1.net/publ/bcnas.pdf", poster = "http://www.hutter1.net/publ/sbcnas.pdf", }

## %-------------Publications-of-Marcus-Hutter-2008--------------%

@Article{Hutter:08actoptx, author = "Daniil Ryabko and Marcus Hutter", title = "On the Possibility of Learning in Reactive Environments with Arbitrary Dependence", journal = "Theoretical Computer Science", volume = "405", number = "3", pages = "274--284", _month = oct, year = "2008", bibtex = "http://www.hutter1.net/official/bib.htm#actoptx", url = "http://arxiv.org/abs/0810.5636", pdf = "http://www.hutter1.net/ai/actoptx.pdf", ps = "http://www.hutter1.net/ai/actoptx.ps", latex = "http://www.hutter1.net/ai/actoptx.tex", slides = "http://www.hutter1.net/ai/sactopt.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1016/j.tcs.2008.06.039", issn = "0304-3975", keywords = "Reinforcement learning, asymptotic average value, self-optimizing policies, (non) Markov decision processes.", abstract = "We address the problem of reinforcement learning in which observations may exhibit an arbitrary form of stochastic dependence on past observations and actions, i.e.\ environments more general than (PO)MDPs. The task for an agent is to attain the best possible asymptotic reward where the true generating environment is unknown but belongs to a known countable family of environments. We find some sufficient conditions on the class of environments under which an agent exists which attains the best asymptotic reward for any environment in the class. We analyze how tight these conditions are and how they relate to different probabilistic assumptions known in reinforcement learning and related fields, such as Markov Decision Processes and mixing conditions.", support = "SNF grant 200020-107616", }

@InProceedings{Hutter:08phi, author = "M. Hutter", title = "Predictive Hypothesis Identification", booktitle = "Presented at 9th Valencia /ISBA 2010 Meeting", pages = "1--16", address = "Benidorm", _month = sep, year = "2008", bibtex = "http://www.hutter1.net/official/bib.htm#phi", url = "http://arxiv.org/abs/0809.1270", pdf = "http://www.hutter1.net/ai/phi.pdf", ps = "http://www.hutter1.net/ai/phi.ps", latex = "http://www.hutter1.net/ai/phi.tex", slides = "http://www.hutter1.net/ai/sphi.pdf", poster = "http://www.hutter1.net/ai/pphi.pdf", project = "http://www.hutter1.net/official/projects.htm#mdl", keywords = "parameter estimation; hypothesis testing; model selection; predictive inference; composite hypotheses; MAP versus ML; moment fitting; Bayesian statistics.", abstract = "While statistics focusses on hypothesis testing and on estimating (properties of) the true sampling distribution, in machine learning the performance of learning algorithms on future data is the primary issue. In this paper we bridge the gap with a general principle (PHI) that identifies hypotheses with best predictive performance. This includes predictive point and interval estimation, simple and composite hypothesis testing, (mixture) model selection, and others as special cases. For concrete instantiations we will recover well-known methods, variations thereof, and new ones. PHI nicely justifies, reconciles, and blends (a reparametrization invariant variation of) MAP, ML, MDL, and moment estimation. One particular feature of PHI is that it can genuinely deal with nested hypotheses.", }

@InProceedings{Hutter:08select, author = "Kassel Hingee and Marcus Hutter", title = "Equivalence of Probabilistic Tournament and Polynomial Ranking Selection", booktitle = "Proc. 2008 Congress on Evolutionary Computation ({CEC'08})", pages = "564--571", publisher = "IEEE", address = "Hongkong", isbn = "978-1-4244-1823-7", _month = jun, year = "2008", bibtex = "http://www.hutter1.net/official/bib.htm#select", url = "http://arxiv.org/abs/0803.2925", pdf = "http://www.hutter1.net/ai/select.pdf", ps = "http://www.hutter1.net/ai/select.ps", latex = "http://www.hutter1.net/ai/select.zip", slides = "http://www.hutter1.net/ai/sselect.pdf", project = "http://www.hutter1.net/official/projects.htm#optimize", doi = "10.1109/CEC.2008.4630852", keywords = "evolutionary algorithms, ranking selection, tournament selection, equivalence, efficiency.", abstract = "Crucial to an Evolutionary Algorithm's performance is its selection scheme. We mathematically investigate the relation between polynomial rank and probabilistic tournament methods which are (respectively) generalisations of the popular linear ranking and tournament selection schemes. We show that every probabilistic tournament is equivalent to a unique polynomial rank scheme. In fact, we derived explicit operators for translating between these two types of selection. Of particular importance is that most linear and most practical quadratic rank schemes are probabilistic tournaments.", }

@Article{Hutter:08pquestx, author = "Daniil Ryabko and Marcus Hutter", title = "Predicting Non-Stationary Processes", journal = "Applied Mathematics Letters", volume = "21", number = "5", pages = "477--482", _month = may, year = "2008", bibtex = "http://www.hutter1.net/official/bib.htm#pquestx", url = "http://arxiv.org/abs/cs.LG/0606077", pdf = "http://www.hutter1.net/ai/pquestx.pdf", ps = "http://www.hutter1.net/ai/pquestx.ps", latex = "http://www.hutter1.net/ai/pquestx.tex", slides = "http://www.hutter1.net/ai/spquest.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", doi = "10.1016/j.aml.2007.04.004", issn = "0893-9659", keywords = "sequence prediction, local absolute continuity, non-stationary measures, average/expected criteria, absolute/KL divergence, mixtures of measures.", abstract = "Suppose we are given two probability measures on the set of one-way infinite finite-alphabet sequences and consider the question when one of the measures predicts the other, that is, when conditional probabilities converge (in a certain sense) when one of the measures is chosen to generate the sequence. This question may be considered a refinement of the problem of sequence prediction in its most general formulation: for a given class of probability measures, does there exist a measure which predicts all of the measures in the class? To address this problem, we find some conditions on local absolute continuity which are sufficient for prediction and which generalize several different notions which are known to be sufficient for prediction. We also formulate some open questions to outline a direction for finding the conditions on classes of measures for which prediction is possible.", support = "SNF grant 200020-107616", }

@Article{Hutter:08kolmo, author = "Marcus Hutter", title = "Algorithmic Complexity", journal = "Scholarpedia", volume = "3", number = "1", pages = "2573", _month = jan, year = "2008", bibtex = "http://www.hutter1.net/official/bib.htm#kolmo", http = "http://www.scholarpedia.org/article/Algorithmic_Complexity", pdf = "http://www.hutter1.net/ai/kolmo.pdf", ps = "http://www.hutter1.net/ai/kolmo.ps", latex = "http://www.hutter1.net/ai/kolmo.zip", slides = "http://www.hutter1.net/ai/sintro2kc.pdf", video = "http://pirsa.org/displayFlash.php?id=18040109", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.4249/scholarpedia.2573", issn = "1941-6016", keywords = "algorithmic information theory, prefix code, prefix Turing machine, Universal Turing machine, Kolmogorov complexity, plain complexity, prefix complexity.", abstract = "The information content or complexity of an object can be measured by the length of its shortest description. For instance the string `01010101010101010101010101010101' has the short description ``16 repetitions of 01'', while `11001000011000011101111011101100' presumably has no simpler description other than writing down the string itself. More formally, the Algorithmic ``Kolmogorov'' Complexity (AC) of a string $x$ is defined as the length of the shortest program that computes or outputs $x$, where the program is run on some fixed reference universal computer.", }

## %-------------Publications-of-Marcus-Hutter-2007--------------%

@InProceedings{Hutter:07qlearn, author = "Marcus Hutter and Shane Legg", title = "Temporal Difference Updating without a Learning Rate", booktitle = "Advances in Neural Information Processing Systems 20", pages = "705--712", _editor = "J.C. Platt and D. Koller and Y. Singer and S. Roweis", publisher = "Curran Associates", address = "Cambridge, MA, USA", _month = dec, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#qlearn", url = "http://arxiv.org/abs/0810.5631", pdf = "http://www.hutter1.net/ai/qlearn.pdf", ps = "http://www.hutter1.net/ai/qlearn.ps", latex = "http://www.hutter1.net/ai/qlearn.zip", poster = "http://www.hutter1.net/ai/sqlearn.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", keywords = "reinforcement learning; temporal difference; eligibility trace; variational principle; learning rate.", abstract = "We derive an equation for temporal difference learning from statistical principles. Specifically, we start with the variational principle and then bootstrap to produce an updating rule for discounted state value estimates. The resulting equation is similar to the standard equation for temporal difference learning with eligibility traces, so called TD(lambda), however it lacks the parameter alpha that specifies the learning rate. In the place of this free parameter there is now an equation for the learning rate that is specific to each state transition. We experimentally test this new learning rule against TD(lambda) and find that it offers superior performance in various settings. Finally, we make some preliminary investigations into how to extend our new temporal difference algorithm to reinforcement learning. To do this we combine our update equation with both Watkins' Q(lambda) and Sarsa(lambda) and find that it again offers superior performance without a learning rate parameter.", for = "080101(100%)", znote = "Acceptance rate: 217/975 = 22\%", }

@InProceedings{Hutter:07intest, author = "Shane Legg and Marcus Hutter", title = "Tests of Machine Intelligence", booktitle = "50 Years of Artificial Intelligence", booksubtitle = "Essays Dedicated to the 50th Anniversary of Artificial Intelligence", address = "Monte Verita, Switzerland", series = "LNAI", volume = "4850", _editor = "M. Lungarella, F. Iida, J. Bongard, R. Pfeifer", pages = "232--242", _month = dec, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#intest", url = "http://arxiv.org/abs/0712.3825", pdf = "http://www.hutter1.net/ai/intest.pdf", ps = "http://www.hutter1.net/ai/intest.ps", latex = "http://www.hutter1.net/ai/intest.tex", poster = "http://www.hutter1.net/ai/siors.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", press = "http://www.hutter1.net/official/press.htm#mim", doi = "10.1007/978-3-540-77296-5_22", issn = "0302-9743", isbn = "978-3-540-77295-8", keywords = "Turing test and derivatives; Compression tests; Linguistic complexity; Multiple cognitive abilities; Competitive games; Psychometric tests; Smith's test; C-test; Universal intelligence", abstract = "Although the definition and measurement of intelligence is clearly of fundamental importance to the field of artificial intelligence, no general survey of definitions and tests of machine intelligence exists. Indeed few researchers are even aware of alternatives to the Turing test and its many derivatives. In this paper we fill this gap by providing a short survey of the many tests of machine intelligence that have been proposed.", support = "SNF grant 200020-107616", }

@Article{Hutter:07iorx, author = "Shane Legg and Marcus Hutter", title = "Universal Intelligence: A Definition of Machine Intelligence", volume = "17", number = "4", journal = "Minds \& Machines", pages = "391--444", _month = dec, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#iorx", url = "http://arxiv.org/abs/0712.3329", pdf = "http://www.hutter1.net/ai/iorx.pdf", ps = "http://www.hutter1.net/ai/iorx.ps", latex = "http://www.hutter1.net/ai/iorx.zip", poster = "http://www.hutter1.net/ai/sior.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", press = "http://www.hutter1.net/official/press.htm#mim", doi = "10.1007/s11023-007-9079-x", issn = "0924-6495", keywords = "AIXI, complexity theory, intelligence, theoretical foundations, Turing test, intelligence tests/measures/definitions", abstract = "A fundamental problem in artificial intelligence is that nobody really knows what intelligence is. The problem is especially acute when we need to consider artificial systems which are significantly different to humans. In this paper we approach this problem in the following way: We take a number of well known informal definitions of human intelligence that have been given by experts, and extract their essential features. These are then mathematically formalised to produce a general measure of intelligence for arbitrary machines. We believe that this equation formally captures the concept of machine intelligence in the broadest reasonable sense. We then show how this formal definition is related to the theory of universal optimal learning agents. Finally, we survey the many other tests and definitions of intelligence that have been proposed for machines.", support = "SNF grant 200020-107616", }

@Article{Hutter:07pcregx, author = "Marcus Hutter", title = "Exact {B}ayesian Regression of Piecewise Constant Functions", journal = "Bayesian Analysis", volume = "2", number = "4", pages = "635--664", _month = dec, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#pcregx", url = "http://arxiv.org/abs/math.ST/0606315", pdf = "http://www.hutter1.net/ai/pcregx.pdf", ps = "http://www.hutter1.net/ai/pcregx.ps", latex = "http://www.hutter1.net/ai/pcregx.tex", slides = "http://www.hutter1.net/ai/spcreg.pdf", award = "http://bayesian.org/project/lindley-prize/", project = "http://www.hutter1.net/official/projects.htm#bayes", code = "http://www.hutter1.net/ai/cpcreg.zip", doi = "10.1214/07-BA225", issn = "1936-0975", keywords = "Bayesian regression, exact polynomial algorithm, non-parametric inference, piecewise constant function, dynamic programming, change point problem.", abstract = "We derive an exact and efficient Bayesian regression algorithm for piecewise constant functions of unknown segment number, boundary locations, and levels. The derivation works for any noise and segment level prior, e.g.\ Cauchy which can handle outliers. We derive simple but good estimates for the in-segment variance. We also propose a Bayesian regression curve as a better way of smoothing data without blurring boundaries. The Bayesian approach also allows straightforward determination of the evidence, break probabilities and error estimates, useful for model selection and significance and robustness studies. We discuss the performance on synthetic and real-world examples. Many possible extensions are discussed.", note = "Lindley prize for innovative research in Bayesian statistics.", }

@Proceedings{Hutter:07altproc, editor = "Marcus Hutter and Rocco A. Servedio and Eiji Takimoto", title = "Algorithmic Learning Theory", subtitle = "18th International Conference ({ALT'07})", publisher = "Springer", address = "Sendai, Japan", series = "LNAI", volume = "4754", _month = oct, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#altproc07", http = "http://www.springer.com/computer/ai/book/978-3-540-75224-0", pdf = "http://www.hutter1.net/ai/altproc07.pdf", project = "http://www.hutter1.net/official/projects.htm#other", doi = "10.1007/978-3-540-75225-7", issn = "0302-9743", isbn = "978-3-540-75224-0", keywords = "algorithmic learning theory, query models, online learning, inductive inference, boosting, kernel methods, complexity and learning, reinforcement learning, unsupervised learning, grammatical inference, algorithmic forecasting.", abstract = "The LNAI series reports state-of-the-art results in artificial intelligence research, development, and education. This volume (LNAI 4754) contains research papers presented at the 18th International Conference on Algorithmic Learning Theory (ALT 2007), which was held in Sendai (Japan) during October 1-4, 2007. The main objective of the conference was to provide an interdisciplinary forum for high-quality talks with a strong theoretical background and scientific interchange in areas such as query models, online learning, inductive inference, boosting, kernel methods, complexity and learning, reinforcement learning, unsupervised learning, grammatical inference, and algorithmic forecasting. The conference was co-located with the 10th International Conference on Discovery Science (DS 2007). The volume includes 25 technical contributions that were selected from 50 submissions, and five invited talks presented to the audience of ALT and DS. Longer versions of the DS invited papers are available in the proceedings of DS 2007.", znote = "Acceptance rate: 25/50 = 50\%", }

@InProceedings{Hutter:07altintro, author = "Marcus Hutter and Rocco A. Servedio and Eiji Takimoto", title = "Algorithmic Learning Theory 2007: Editors' Introduction", booktitle = "Proc. 18th International Conf. on Algorithmic Learning Theory ({ALT'07})", address = "Sendai, Japan", series = "LNAI", volume = "4754", publisher = "Springer", pages = "1--8", _month = oct, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#altintro07", pdf = "http://www.hutter1.net/ai/altintro07.pdf", ps = "http://www.hutter1.net/ai/altintro07.ps", latex = "http://www.hutter1.net/ai/altintro07.tex", project = "http://www.hutter1.net/official/projects.htm#other", issn = "0302-9743", isbn = "3-540-75224-2", doi = "10.1007/978-3-540-75225-7_1", keywords = "algorithmic learning theory, query models, online learning, inductive inference, boosting, kernel methods, complexity and learning, reinforcement learning, unsupervised learning, grammatical inference, algorithmic forecasting.", abstract = "Learning theory is an active research area that incorporates ideas, problems, and techniques from a wide range of disciplines including statistics, artificial intelligence, information theory, pattern recognition, and theoretical computer science. The research reported at the 18th International Conference on Algorithmic Learning Theory (ALT 2007) ranges over areas such as unsupervised learning, inductive inference, complexity and learning, boosting and reinforcement learning, query learning models, grammatical inference, online learning and defensive forecasting, and kernel methods. In this introduction we give an overview of the five invited talks and the regular contributions of ALT 2007.", }

@Article{Hutter:07uspx, author = "Marcus Hutter", title = "On Universal Prediction and {B}ayesian Confirmation", journal = "Theoretical Computer Science", volume = "384", number = "1", pages = "33--48", _month = sep, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#uspx", url = "http://arxiv.org/abs/0709.1516", pdf = "http://www.hutter1.net/ai/uspx.pdf", ps = "http://www.hutter1.net/ai/uspx.ps", latex = "http://www.hutter1.net/ai/uspx.tex", slides = "http://www.hutter1.net/ai/susp.pdf", poster = "http://www.hutter1.net/ai/susps.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", doi = "10.1016/j.tcs.2007.05.016", issn = "0304-3975", keywords = "Sequence prediction, Bayes, Solomonoff prior, Kolmogorov complexity, Occam's razor, prediction bounds, model classes, philosophical issues, symmetry principle, confirmation theory, reparametrization invariance, old-evidence/updating problem, (non)computable environments.", abstract = "The Bayesian framework is a well-studied and successful framework for inductive reasoning, which includes hypothesis testing and confirmation, parameter estimation, sequence prediction, classification, and regression. But standard statistical guidelines for choosing the model class and prior are not always available or fail, in particular in complex situations. Solomonoff completed the Bayesian framework by providing a rigorous, unique, formal, and universal choice for the model class and the prior. We discuss in breadth how and in which sense universal (non-i.i.d.) sequence prediction solves various (philosophical) problems of traditional Bayesian sequence prediction. We show that Solomonoff's model possesses many desirable properties: Strong total and weak instantaneous bounds, and in contrast to most classical continuous prior densities has no zero p(oste)rior problem, i.e. can confirm universal hypotheses, is reparametrization and regrouping invariant, and avoids the old-evidence and updating problem. It even performs well (actually better) in non-computable environments.", }

@Article{Hutter:07mlconvxx, author = "Marcus Hutter and Andrej A. Muchnik", title = "On Semimeasures Predicting {Martin-L{\"o}f} Random Sequences", journal = "Theoretical Computer Science", volume = "382", number = "3", pages = "247--261", _month = sep, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#mlconvxx", url = "http://arxiv.org/abs/0708.2319", pdf = "http://www.hutter1.net/ai/mlconvxx.pdf", ps = "http://www.hutter1.net/ai/mlconvxx.ps", latex = "http://www.hutter1.net/ai/mlconvxx.tex", slides = "http://www.hutter1.net/ai/smlconvx.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1016/j.tcs.2007.03.040", issn = "0304-3975", keywords = "Sequence prediction; Algorithmic Information Theory; universal enumerable semimeasure; mixture distributions; posterior convergence; Martin-L{\"o}f randomness; quasimeasures.", abstract = "Solomonoff's central result on induction is that the posterior of a universal semimeasure M converges rapidly and with probability 1 to the true sequence generating posterior mu, if the latter is computable. Hence, M is eligible as a universal sequence predictor in case of unknown mu. Despite some nearby results and proofs in the literature, the stronger result of convergence for all (Martin-Loef) random sequences remained open. Such a convergence result would be particularly interesting and natural, since randomness can be defined in terms of M itself. We show that there are universal semimeasures M which do not converge for all random sequences, i.e. we give a partial negative answer to the open problem. We also provide a positive answer for some non-universal semimeasures. We define the incomputable measure D as a mixture over all computable measures and the enumerable semimeasure W as a mixture over all enumerable nearly-measures. We show that W converges to D and D to mu on all random sequences. The Hellinger distance measuring closeness of two distributions plays a central role.", support = "SNF grant 2100-67712 and RFBF grants N04-01-00427 and N02-01-22001", }

@Article{Hutter:07algprob, author = "Marcus Hutter and Shane Legg and Paul M. B. Vit{\'a}nyi", title = "Algorithmic Probability", journal = "Scholarpedia", volume = "2", number = "8", pages = "2572", _month = aug, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#algprob", http = "http://www.scholarpedia.org/article/Algorithmic_Probability", pdf = "http://www.hutter1.net/ai/algprob.pdf", ps = "http://www.hutter1.net/ai/algprob.ps", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.4249/scholarpedia.2572", issn = "1941-6016", keywords = "algorithmic information theory, algorithmic complexity, discrete/continuous algorithmic probability, Bayes, Occam, Epicurus, applications, references", abstract = "Algorithmic ``Solomonoff'' Probability (AP) assigns to objects an a priori probability that is in some sense universal. This prior distribution has theoretical applications in a number of areas, including inductive inference theory and the time complexity analysis of algorithms. Its main drawback is that it is not computable and thus can only be approximated in practice", }

@InProceedings{Hutter:07improb, author = "Alberto Piatti and Marco Zaffalon and Fabio Trojani and Marcus Hutter", title = "Learning about a Categorical Latent Variable under Prior Near-Ignorance", booktitle = "Proc. 5th International Symposium on Imprecise Probability: Theories and Applications ({ISIPTA'07})", pages = "357--364", _editor = "G. de Cooman and J. Vejnarova and M. Zaffalon", publisher = "Action M Agency", address = "Prague, Czech Republic", _month = jul, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#improb", url = "http://arxiv.org/abs/0705.4312", pdf = "http://www.hutter1.net/ai/improb.pdf", ps = "http://www.hutter1.net/ai/improb.ps", latex = "http://www.hutter1.net/ai/improb.tex", slides = "http://www.hutter1.net/ai/simprob.pdf", project = "http://www.hutter1.net/official/projects.htm#robust", code = "http://www.hutter1.net/ai/improb.cpp", isbn = "978-80-86742-20-5", keywords = "Prior near-ignorance, latent and manifest variables, observational processes, vacuous beliefs, imprecise probabilities.", abstract = "It is well known that complete prior ignorance is not compatible with learning, at least in a coherent theory of (epistemic) uncertainty. What is less widely known, is that there is a state similar to full ignorance, that Walley calls \emph{near-ignorance}, that permits learning to take place. In this paper we provide new and substantial evidence that also near-ignorance cannot be really regarded as a way out of the problem of starting statistical inference in conditions of very weak beliefs. The key to this result is focusing on a setting characterized by a variable of interest that is \emph{latent}. We argue that such a setting is by far the most common case in practice, and we show, for the case of categorical latent variables (and general \emph{manifest} variables) that there is a sufficient condition that, if satisfied, prevents learning to take place under prior near-ignorance. This condition is shown to be easily satisfied in the most common statistical problems.", znote = "Acceptance rate: 48/70 = 68\%", }

@InProceedings{Hutter:07pcreg, author = "Marcus Hutter", title = "{B}ayesian Regression of Piecewise Constant Functions", booktitle = "Proc. ISBA 8th International Meeting on Bayesian Statistics", address = "Benidorm, Spain", _editor = "J.M. Bernardo and M.J. Bayarri and J.O. Berger and A.P. David and D. Heckerman and A.F.M. Smith and M. West", publisher = "Oxford University Press", pages = "607--612", _month = jul, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#pcreg", url = "http://arxiv.org/abs/math.ST/0606315", pdf = "http://www.hutter1.net/ai/pcreg.pdf", ps = "http://www.hutter1.net/ai/pcreg.ps", latex = "http://www.hutter1.net/ai/pcreg.tex", slides = "http://www.hutter1.net/ai/spcreg.pdf", award = "http://bayesian.org/project/lindley-prize/", project = "http://www.hutter1.net/official/projects.htm#bayes", ccode = "http://www.hutter1.net/ai/pcreg.cpp", rcode = "http://www.hutter1.net/ai/cpcreg.zip", isbn = "978-0-19-921465-5", abstract = "We derive an exact and efficient Bayesian regression algorithm for piecewise constant functions of unknown segment number, boundary location, and levels. It works for any noise and segment level prior, e.g.\ Cauchy which can handle outliers. We derive simple but good estimates for the in-segment variance. We also propose a Bayesian regression curve as a better way of smoothing data without blurring boundaries. The Bayesian approach also allows straightforward determination of the evidence, break probabilities and error estimates, useful for model selection and significance and robustness studies. We briefly mention the performance on synthetic and real-world examples. The full version of the paper contains detailed derivations, more motivation and discussion, the complete algorithm, the experiments, and various extensions.", keywords = "Bayesian regression, exact polynomial algorithm, non-parametric inference, piecewise constant function, dynamic programming, change point problem.", note = "Lindley prize for innovative research in Bayesian statistics.", znote = "Acceptance rate: 19/326 = 6\%.", }

@InProceedings{Hutter:07pquest, author = "Daniil Ryabko and Marcus Hutter", title = "On Sequence Prediction for Arbitrary Measures", booktitle = "Proc. IEEE International Symposium on Information Theory ({ISIT'07})", pages = "2346--2350", _editor = "A. Goldsmith and M. Medard and A. Shokrollahi and R. Zamir", publisher = "IEEE", address = "Nice, France", _month = jun, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#pquest", url = "http://arxiv.org/abs/cs.LG/0606077", pdf = "http://www.hutter1.net/ai/pquest.pdf", ps = "http://www.hutter1.net/ai/pquest.ps", latex = "http://www.hutter1.net/ai/pquest.tex", slides = "http://www.hutter1.net/ai/spquest.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", doi = "10.1109/ISIT.2007.4557570", isbn = "1-4244-1429-6", keywords = "sequence prediction, local absolute continuity, non-stationary measures, average/expected criteria, absolute/KL divergence, mixtures of measures.", abstract = "Suppose we are given two probability measures on the set of one-way infinite finite-alphabet sequences. Consider the question when one of the measures predicts the other, that is, when conditional probabilities converge (in a certain sense), if one of the measures is chosen to generate the sequence. This question may be considered a refinement of the problem of sequence prediction in its most general formulation: for a given class of probability measures, does there exist a measure which predicts all of the measures in the class? To address this problem, we find some conditions on local absolute continuity which are sufficient for prediction and generalize several different notions that are known to be sufficient for prediction. We also formulate some open questions to outline a direction for finding the conditions on classes of measures for which prediction is possible.", support = "SNF grant 200020-107616", }

@InProceedings{Hutter:07idefs, author = "Shane Legg and Marcus Hutter", title = "A Collection of Definitions of Intelligence", booktitle = "Advances in Artificial General Intelligence: Concepts, Architectures and Algorithms", series = "Frontiers in Artificial Intelligence and Applications", volume = "157", pages = "17--24", editor = "B. Goertzel and P. Wang", publisher = "IOS Press", address = "Amsterdam, NL", _month = jun, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#idefs", url = "http://arxiv.org/abs/0706.3639", http = "http://www.idsia.ch/~shane/intelligence.html", pdf = "http://www.hutter1.net/ai/idefs.pdf", ps = "http://www.hutter1.net/ai/idefs.ps", latex = "http://www.hutter1.net/ai/idefs.tex", project = "http://www.hutter1.net/official/projects.htm#uai", isbn = "978-1-58603-758-1", issn = "0922-6389", keywords = "intelligence definitions, collective, psychologist, artificial, universal", abstract = "This chapter is a survey of a large number of informal definitions of ``intelligence'' that the authors have collected over the years. Naturally, compiling a complete list would be impossible as many definitions of intelligence are buried deep inside articles and books. Nevertheless, the 70-odd definitions presented here are, to the authors' knowledge, the largest and most well referenced collection there is.", support = "SNF grant 200020-107616", }

@InProceedings{Hutter:07lorp, author = "Marcus Hutter", title = "The Loss Rank Principle for Model Selection", booktitle = "Proc. 20th Annual Conf. on Learning Theory ({COLT'07})", address = "San Diego, USA", series = "LNAI", volume = "4539", _editor = "N. Bshouty and C. Gentile", publisher = "Springer", pages = "589--603", _month = jun, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#lorp", url = "http://arxiv.org/abs/math.ST/0702804", pdf = "http://www.hutter1.net/ai/lorp.pdf", ps = "http://www.hutter1.net/ai/lorp.ps", latex = "http://www.hutter1.net/ai/lorp.tex", slides = "http://www.hutter1.net/ai/slorp.pdf", project = "http://www.hutter1.net/official/projects.htm#mdl", doi = "10.1007/978-3-540-72927-3_42", issn = "0302-9743", keywords = "Model selection, loss rank principle, non-parametric regression, classification general loss function, k nearest neighbors.", abstract = "We introduce a new principle for model selection in regression and classification. Many regression models are controlled by some smoothness or flexibility or complexity parameter c, e.g. the number of neighbors to be averaged over in k nearest neighbor (kNN) regression or the polynomial degree in regression with polynomials. Let f_D^c be the (best) regressor of complexity c on data D. A more flexible regressor can fit more data D' well than a more rigid one. If something (here small loss) is easy to achieve it's typically worth less. We define the loss rank of f_D^c as the number of other (fictitious) data D' that are fitted better by f_D'^c than D is fitted by f_D^c. We suggest selecting the model complexity c that has minimal loss rank (LoRP). Unlike most penalized maximum likelihood variants (AIC,BIC,MDL), LoRP only depends on the regression function and loss function. It works without a stochastic noise model, and is directly applicable to any non-parametric regressor, like kNN. In this paper we formalize, discuss, and motivate LoRP, study it for specific regression problems, in particular linear ones, and compare it to other model selection schemes.", znote = "Acceptance rate: 41/92 = 45\%", }

@Article{Hutter:07ait, author = "Marcus Hutter", title = "Algorithmic Information Theory: a brief non-technical guide to the field", journal = "Scholarpedia", volume = "2", number = "3", pages = "2519", _month = mar, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#ait", http = "http://www.scholarpedia.org/article/Algorithmic_Information_Theory", url = "http://arxiv.org/abs/cs.IT/0703024", pdf = "http://www.hutter1.net/ai/ait.pdf", ps = "http://www.hutter1.net/ai/ait.ps", latex = "http://www.hutter1.net/ai/ait.zip", slides = "http://www.hutter1.net/ai/sapplait.pdf", video = "http://pirsa.org/displayFlash.php?id=18040109", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.4249/scholarpedia.2519", issn = "1941-6016", keywords = "Algorithmic information theory, algorithmic ``Kolmogorov'' complexity, algorithmic ``Solomonoff'' probability, universal ``Levin'' search, algorithmic ``Martin-Loef'' randomness, applications, history, references, notation, nomenclature, map.", abstract = "This article is a brief guide to the field of algorithmic information theory (AIT), its underlying philosophy, and the most important concepts. AIT arises by mixing information theory and computation theory to obtain an objective and absolute notion of information in an individual object, and in so doing gives rise to an objective and robust notion of randomness of individual objects. This is in contrast to classical information theory that is based on random variables and communication, and has no bearing on information and randomness of individual objects. After a brief overview, the major subfields, applications, history, and a map of the field are presented.", }

@Article{Hutter:07postbndx, author = "Alexey Chernov and Marcus Hutter and J{\"u}rgen Schmidhuber", title = "Algorithmic Complexity Bounds on Future Prediction Errors", journal = "Information and Computation", volume = "205", number = "2", pages = "242--261", _month = feb, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#postbndx", url = "http://arxiv.org/abs/cs.LG/0701120", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT05/alt05.jhtml", pdf = "http://www.hutter1.net/ai/postbndx.pdf", ps = "http://www.hutter1.net/ai/postbndx.ps", latex = "http://www.hutter1.net/ai/postbndx.tex", slides = "http://www.hutter1.net/ai/spostbnd.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1016/j.ic.2006.10.004", issn = "0890-5401", keywords = "Kolmogorov complexity, posterior bounds, online sequential prediction, Solomonoff prior, monotone conditional complexity, total error, future loss, randomness deficiency", abstract = "We bound the future loss when predicting any (computably) stochastic sequence online. Solomonoff finitely bounded the total deviation of his universal predictor $M$ from the true distribution $mu$ by the algorithmic complexity of $mu$. Here we assume we are at a time $t>1$ and already observed $x=x_1...x_t$. We bound the future prediction performance on $x_{t+1}x_{t+2}...$ by a new variant of algorithmic complexity of $mu$ given $x$, plus the complexity of the randomness deficiency of $x$. The new complexity is monotone in its condition in the sense that this complexity can only decrease if the condition is prolonged. We also briefly discuss potential generalizations to Bayesian model classes and to classification problems.", support = "SNF grant 2000-61847", }

@InCollection{Hutter:07aixigentle, author = "Marcus Hutter", title = "Universal Algorithmic Intelligence: A Mathematical Top$\rightarrow$Down Approach", booktitle = "Artificial General Intelligence", _editor = "B. Goertzel and C. Pennachin", publisher = "Springer", address = "Berlin", _series = "Cognitive Technologies", pages = "227--290", _month = jan, year = "2007", bibtex = "http://www.hutter1.net/official/bib.htm#aixigentle", http = "http://www.hutter1.net/ai/aixigentle.htm", url = "http://arxiv.org/abs/cs.AI/0701125", pdf = "http://www.hutter1.net/ai/aixigentle.pdf", ps = "http://www.hutter1.net/ai/aixigentle.ps", latex = "http://www.hutter1.net/ai/aixigentle.tex", slides = "http://www.hutter1.net/ai/saixigentle.pdf", video = "http://vimeo.com/14888930", project = "http://www.hutter1.net/official/projects.htm#uai", press = "http://www.hutter1.net/official/press.htm#uaibook", isbn = "3-540-23733-X", categories = "I.2. [Artificial Intelligence]", keywords = "Artificial intelligence; algorithmic probability; sequential decision theory; rational agents; value function; Solomonoff induction; Kolmogorov complexity; reinforcement learning; universal sequence prediction; strategic games; function minimization; supervised learning.", abstract = "Decision theory formally solves the problem of rational agents in uncertain worlds if the true environmental prior probability distribution is known. Solomonoff's theory of universal induction formally solves the problem of sequence prediction for unknown prior distribution. We combine both ideas and get a parameter-free theory of universal Artificial Intelligence. We give strong arguments that the resulting AIXI model is the most intelligent unbiased agent possible. We outline for a number of problem classes, including sequence prediction, strategic games, function minimization, reinforcement and supervised learning, how the AIXI model can formally solve them. The major drawback of the AIXI model is that it is uncomputable. To overcome this problem, we construct a modified algorithm AIXI$tl$ that is still effectively more intelligent than any other time $t$ and length $l$ bounded agent. The computation time of AIXI$tl$ is of the order $t \cdot 2^l$. Other discussed topics are formal definitions of intelligence order relations, the horizon problem and relations of the AIXI theory to other AI approaches.", }

## %-------------Publications-of-Marcus-Hutter-2006--------------%

@Article{Hutter:06unipriorx, author = "Marcus Hutter", title = "On Generalized Computable Universal Priors and their Convergence", journal = "Theoretical Computer Science", volume = "364", number = "1", pages = "27--41", _month = nov, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#unipriorx", url = "http://arxiv.org/abs/cs.LG/0503026", pdf = "http://www.hutter1.net/ai/unipriorx.pdf", ps = "http://www.hutter1.net/ai/unipriorx.ps", latex = "http://www.hutter1.net/ai/unipriorx.tex", slides = "http://www.hutter1.net/ai/sunipriors.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1016/j.tcs.2006.07.039", issn = "0304-3975", keywords = "Sequence prediction; Algorithmic Information Theory; Solomonoff's prior; universal probability; mixture distributions; posterior convergence; computability concepts; Martin-Loef randomness.", abstract = "Solomonoff unified Occam's razor and Epicurus' principle of multiple explanations to one elegant, formal, universal theory of inductive inference, which initiated the field of algorithmic information theory. His central result is that the posterior of the universal semimeasure M converges rapidly to the true sequence generating posterior mu, if the latter is computable. Hence, M is eligible as a universal predictor in case of unknown mu. The first part of the paper investigates the existence and convergence of computable universal (semi)measures for a hierarchy of computability classes: recursive, estimable, enumerable, and approximable. For instance, M is known to be enumerable, but not estimable, and to dominate all enumerable semimeasures. We present proofs for discrete and continuous semimeasures. The second part investigates more closely the types of convergence, possibly implied by universality: in difference and in ratio, with probability 1, in mean sum, and for Martin-Loef random sequences. We introduce a generalized concept of randomness for individual sequences and use it to exhibit difficulties regarding these issues. In particular, we show that convergence fails (holds) on generalized-random sequences in gappy (dense) Bernoulli classes.", }

@Article{Hutter:06fuo, author = "Marcus Hutter and Shane Legg", title = "Fitness Uniform Optimization", journal = "IEEE Transactions on Evolutionary Computation", volume = "10", number = "5", pages = "568--589", _month = oct, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#fuo", url = "http://arxiv.org/abs/cs.NE/0610126", pdf = "http://www.hutter1.net/ai/fuo.pdf", ps = "http://www.hutter1.net/ai/fuo.ps", latex = "http://www.hutter1.net/ai/fuo.zip", slides = "http://www.hutter1.net/ai/sfuss.pdf", project = "http://www.hutter1.net/official/projects.htm#optimize", press = "http://www.hutter1.net/official/press.htm#fuss", doi = "10.1109/TEVC.2005.863127", issn = "1089-778X", keywords = "Evolutionary algorithms, fitness uniform selection scheme, fitness uniform deletion scheme, preserve diversity, local optima, evolution, universal similarity relation, correlated recombination, fitness tree model, traveling salesman, set covering, satisfiability.", abstract = "In evolutionary algorithms, the fitness of a population increases with time by mutating and recombining individuals and by a biased selection of more fit individuals. The right selection pressure is critical in ensuring sufficient optimization progress on the one hand and in preserving genetic diversity to be able to escape from local optima on the other hand. Motivated by a universal similarity relation on the individuals, we propose a new selection scheme, which is uniform in the fitness values. It generates selection pressure toward sparsely populated fitness regions, not necessarily toward higher fitness, as is the case for all other selection schemes. We show analytically on a simple example that the new selection scheme can be much more effective than standard selection schemes. We also propose a new deletion scheme which achieves a similar result via deletion and show how such a scheme preserves genetic diversity more effectively than standard approaches. We compare the performance of the new schemes to tournament selection and random deletion on an artificial deceptive problem and a range of NP-hard problems: traveling salesman, set covering and satisfiability.", }

@InProceedings{Hutter:06discount, author = "Marcus Hutter", title = "General Discounting versus Average Reward", booktitle = "Proc. 17th International Conf. on Algorithmic Learning Theory ({ALT'06})", address = "Barcelona, Spain", series = "LNAI", volume = "4264", _editor = "Jose L. Balcázar and Phil Long and Frank Stephan", publisher = "Springer", pages = "244--258", _month = oct, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#discount", url = "http://arxiv.org/abs/cs.LG/0605040", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT06/alt06.jhtml", pdf = "http://www.hutter1.net/ai/discount.pdf", ps = "http://www.hutter1.net/ai/discount.ps", latex = "http://www.hutter1.net/ai/discount.tex", slides = "http://www.hutter1.net/ai/sdiscount.pdf", project = "http://www.hutter1.net/official/projects.htm#rl", issn = "0302-9743", isbn = "3-540-46649-5", doi = "10.1007/11894841_21", keywords = "reinforcement learning; average value; discounted value; arbitrary environment; arbitrary discount sequence; effective horizon; increasing farsightedness; consistent behavior.", abstract = "Consider an agent interacting with an environment in cycles. In every interaction cycle the agent is rewarded for its performance. We compare the average reward U from cycle 1 to m (average value) with the future discounted reward V from cycle k to infinity (discounted value). We consider essentially arbitrary (non-geometric) discount sequences and arbitrary reward sequences (non-MDP environments). We show that asymptotically U for m->infinity and V for k->infinity are equal, provided both limits exist. Further, if the effective horizon grows linearly with k or faster, then existence of the limit of U implies that the limit of V exists. Conversely, if the effective horizon grows linearly with k or slower, then existence of the limit of V implies that the limit of U exists.", znote = "Acceptance rate: 24/53 = 45\%", }

@InProceedings{Hutter:06actopt, author = "Daniil Ryabko and Marcus Hutter", title = "Asymptotic Learnability of Reinforcement Problems with Arbitrary Dependence", booktitle = "Proc. 17th International Conf. on Algorithmic Learning Theory ({ALT'06})", address = "Barcelona, Spain", series = "LNAI", volume = "4264", _editor = "Jose L. Balcázar and Phil Long and Frank Stephan", publisher = "Springer", pages = "334--347", _month = oct, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#actopt", url = "http://arxiv.org/abs/cs.LG/0603110", conf = "http://www-alg.ist.hokudai.ac.jp/~thomas/ALT06/alt06.jhtml", pdf = "http://www.hutter1.net/ai/actopt.pdf", ps = "http://www.hutter1.net/ai/actopt.ps", latex = "http://www.hutter1.net/ai/actopt.tex", slides = "http://www.hutter1.net/ai/sactopt.pdf", project = "http://www.hutter1.net/official/projects.htm#universal", press = "http://www.hutter1.net/official/press.htm#universal", issn = "0302-9743", isbn = "3-540-46649-5", doi = "10.1007/11894841_27", keywords = "Reinforcement learning, asymptotic average value, self-optimizing policies, (non) Markov decision processes.", abstract = "We address the problem of reinforcement learning in which observations may exhibit an arbitrary form of stochastic dependence on past observations and actions, i.e. environments more general than (PO)MDPs. The task for an agent is to attain the best possible asymptotic reward where the true generating environment is unknown but belongs to a known countable family of environments. We find some sufficient conditions on the class of environments under which an agent exists which attains the best asymptotic reward for any environment in the class. We analyze how tight these conditions are and how they relate to different probabilistic assumptions known in reinforcement learning and related fields, such as Markov Decision Processes and mixing conditions.", znote = "Acceptance rate: 24/53 = 45\%", }

@Misc{Hutter:06hprize, author = "Marcus Hutter", title = "Human Knowledge Compression Prize", _month = aug, year = "2006/2020", bibtex = "http://www.hutter1.net/official/bib.htm#hprize", project = "http://www.hutter1.net/prize/index.htm", press = "http://www.hutter1.net/official/press.htm#hprize", keywords = "Wikipedia; artificial intelligence; lossless data compression; 50'000€/500'000€.", abstract = "Being able to compress well is closely related to intelligence. While intelligence is a slippery concept, file sizes are hard numbers. The intention of this prize is to give incentives for advancing the field of Artificial Intelligence through the compression of human knowledge. The better one can compress the encyclopedia Wikipedia, the better one can predict; and being able to predict well is key for being able to act intelligently.", note = "open ended, http://prize.hutter1.net/", for = "080401(80%),080199(20%)", }

@Article{Hutter:06mdlspeedx, author = "Jan Poland and Marcus Hutter", title = "{MDL} Convergence Speed for {B}ernoulli Sequences", journal = "Statistics and Computing", volume = "16", number = "2", pages = "161--175", _month = jun, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#mdlspeedx", url = "http://arxiv.org/abs/math.ST/0602505", pdf = "http://www.hutter1.net/ai/mdlspeedx.pdf", ps = "http://www.hutter1.net/ai/mdlspeedx.ps", latex = "http://www.hutter1.net/ai/mdlspeedx.tex", slides = "http://www.hutter1.net/ai/smdlspeed.pdf", slidesppt = "http://www.hutter1.net/ai/smdlspeed.ppt", project = "http://www.hutter1.net/official/projects.htm#mdl", issn = "0960-3174", doi = "10.1007/s11222-006-6746-3", keywords = "MDL, Minimum Description Length, Convergence Rate, Prediction, Bernoulli, Discrete Model Class.", abstract = "The Minimum Description Length principle for online sequence estimation/prediction in a proper learning setup is studied. If the underlying model class is discrete, then the total expected square loss is a particularly interesting performance measure: (a) this quantity is finitely bounded, implying convergence with probability one, and (b) it additionally specifies the convergence speed. For MDL, in general one can only have loss bounds which are finite but exponentially larger than those for Bayes mixtures. We show that this is even the case if the model class contains only Bernoulli distributions. We derive a new upper bound on the prediction error for countable Bernoulli classes. This implies a small bound (comparable to the one for Bayes mixtures) for certain important model classes. We discuss the application to Machine Learning tasks such as classification and hypothesis testing, and generalization to countable classes of i.i.d. models.", }

@InProceedings{Hutter:06usp, author = "Marcus Hutter", title = "On the Foundations of Universal Sequence Prediction", booktitle = "Proc. 3rd Annual Conference on Theory and Applications of Models of Computation ({TAMC'06})", volume = "3959", series = "LNCS", pages = "408--420", _editor = "J.-Y. Cai and S. B. Cooper and A. Li", publisher = "Springer", _address = "Beijing", _month = may, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#usp", url = "http://arxiv.org/abs/cs.LG/0605009", conf = "http://gcl.iscas.ac.cn/accl06/TAMC06_Home.htm", pdf = "http://www.hutter1.net/ai/usp.pdf", ps = "http://www.hutter1.net/ai/usp.ps", latex = "http://www.hutter1.net/ai/usp.tex", slides = "http://www.hutter1.net/ai/susp.pdf", poster = "http://www.hutter1.net/ai/susps.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", issn = "0302-9743", isbn = "3-540-34021-1", doi = "10.1007/11750321_39", keywords = "Sequence prediction, Bayes, Solomonoff prior, Kolmogorov complexity, Occam's razor, prediction bounds, model classes, philosophical issues, symmetry principle, confirmation theory, reparametrization invariance, old-evidence/updating problem, (non)computable environments.", abstract = "Solomonoff completed the Bayesian framework by providing a rigorous, unique, formal, and universal choice for the model class and the prior. We discuss in breadth how and in which sense universal (non-i.i.d.) sequence prediction solves various (philosophical) problems of traditional Bayesian sequence prediction. We show that Solomonoff's model possesses many desirable properties: Fast convergence and strong bounds, and in contrast to most classical continuous prior densities has no zero p(oste)rior problem, i.e. can confirm universal hypotheses, is reparametrization and regrouping invariant, and avoids the old-evidence and updating problem. It even performs well (actually better) in non-computable environments.", znote = "Acceptance rate: 76/400 = 19\%", alt = "Also 2-page abstract and poster at 9th ISBA World Meeting (2008)", abstract2p = "http://www.hutter1.net/ai/usps.pdf", }

@InProceedings{Hutter:06aixifoe, author = "Jan Poland and Marcus Hutter", title = "Universal Learning of Repeated Matrix Games", booktitle = "Proc. 15th Annual Machine Learning Conf. of {B}elgium and {T}he {N}etherlands ({Benelearn'06})", pages = "7--14", address = "Ghent, Belgium", _editor = "Yvan Saeys and Bernard De Baets and Elena Tsiporkova and Yves Van de Peer", xpublisher = "", _month = may, year = "2006", isbn = "90 382 0948 7", bibtex = "http://www.hutter1.net/official/bib.htm#aixifoe", url = "http://arxiv.org/abs/cs.LG/0508073", conf = "http://bioinformatics.psb.ugent.be/benelearn2006/", pdf = "http://www.hutter1.net/ai/aixifoe.pdf", ps = "http://www.hutter1.net/ai/aixifoe.ps", latex = "http://www.hutter1.net/ai/aixifoe.zip", slides = "http://www.hutter1.net/ai/saixifoe.pdf", project = "http://www.hutter1.net/official/projects.htm#expert", abstract = "We study and compare the learning dynamics of two universal learning algorithms, one based on Bayesian learning and the other on prediction with expert advice. Both approaches have strong asymptotic performance guarantees. When confronted with the task of finding good long-term strategies in repeated 2 x 2 matrix games, they behave quite differently. We consider the case where the learning algorithms are not even informed about the game they are playing.", }

@InProceedings{Hutter:06ior, author = "Shane Legg and Marcus Hutter", title = "A Formal Measure of Machine Intelligence", booktitle = "Proc. 15th Annual Machine Learning Conference of {B}elgium and {T}he {N}etherlands ({Benelearn'06})", pages = "73--80", address = "Ghent, Belgium", _editor = "Yvan Saeys and Bernard De Baets and Elena Tsiporkova and Yves Van de Peer", _month = may, year = "2006", isbn = "90 382 0948 7", bibtex = "http://www.hutter1.net/official/bib.htm#ior", url = "http://arxiv.org/abs/cs.AI/0605024", conf = "http://bioinformatics.psb.ugent.be/benelearn2006/", pdf = "http://www.hutter1.net/ai/ior.pdf", ps = "http://www.hutter1.net/ai/ior.ps", latex = "http://www.hutter1.net/ai/ior.zip", slides = "http://www.hutter1.net/ai/sior.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", press = "http://www.hutter1.net/official/press.htm#ior", abstract = "A fundamental problem in artificial intelligence is that nobody really knows what intelligence is. The problem is especially acute when we need to consider artificial systems which are significantly different to humans. In this paper we approach this problem in the following way: We take a number of well known informal definitions of human intelligence that have been given by experts, and extract their essential features. These are then mathematically formalised to produce a general measure of intelligence for arbitrary machines. We believe that this measure formally captures the concept of machine intelligence in the broadest reasonable sense.", }

@InProceedings{Hutter:06robot, author = "Viktor Zhumatiy and Faustino Gomez and Marcus Hutter and J{\"u}rgen Schmidhuber", title = "Metric State Space Reinforcement Learning for a Vision-Capable Mobile Robot", booktitle = "Proc. 9th International Conf. on Intelligent Autonomous Systems ({IAS'06})", pages = "272--281", _editor = "Tamio Arai and Rolf Pfeifer and Tucker Balch and Hiroshi Yokoi", publisher = "IOR Press", _month = mar, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#robot", url = "http://arxiv.org/abs/cs.RO/0603023", conf = "http://www.arai.pe.u-tokyo.ac.jp/IAS-9/", pdf = "http://www.hutter1.net/ai/robot.pdf", ps = "http://www.hutter1.net/ai/robot.ps", latex = "http://www.hutter1.net/ai/robot.zip", slides = "http://www.hutter1.net/ai/srobot.pdf", slidesppt = "http://www.hutter1.net/ai/srobot.ppt", isbn = "1-58603-595-9", keywords = "reinforcement learning; mobile robots.", abstract = "We address the problem of autonomously learning controllers for vision-capable mobile robots. We extend McCallum's (1995) Nearest-Sequence Memory algorithm to allow for general metrics over state-action trajectories. We demonstrate the feasibility of our approach by successfully running our algorithm on a real mobile robot. The algorithm is novel and unique in that it (a) explores the environment and learns directly on a mobile robot without using a hand-made computer model as an intermediate step, (b) does not require manual discretization of the sensor input space, (c) works in piecewise continuous perceptual spaces, and (d) copes with partial observability. Together this allows learning from much less experience compared to previous methods.", znote = "Acceptance rate: 112/146 = 77\%", }

@Article{Hutter:06knapsack, author = "Monaldo Mastrolilli and Marcus Hutter", title = "Hybrid Rounding Techniques for Knapsack Problems", journal = "Discrete Applied Mathematics", volume = "154", number = "4", pages = "640--649", _month = mar, year = "2006", bibtex = "http://www.hutter1.net/official/bib.htm#knapsack", url = "http://arxiv.org/abs/cs.CC/0305002", pdf = "http://www.hutter1.net/ai/knapsack.pdf", ps = "http://www.hutter1.net/ai/knapsack.ps", latex = "http://www.hutter1.net/ai/knapsack.tex", project = "http://www.hutter1.net/official/projects.htm#optimize", issn = "0166-218X", doi = "10.1016/j.dam.2005.08.004", abstract = "We address the classical knapsack problem and a variant in which an upper bound is imposed on the number of items that can be selected. We show that appropriate combinations of rounding techniques yield novel and powerful ways of rounding. As an application of these techniques, we present faster polynomial time approximation schemes that computes an approximate solution of any fixed accuracy in linear time. This linear complexity bounds give a substantial improvement of the best previously known polynomial bounds", }

@Article{Hutter:06unimdlx, author = "Marcus Hutter", title = "Sequential Predictions based on Algorithmic Complexity", journal = "Journal of Computer and System Sciences", volume = "72", number = "1", pages = "95--117", _month = feb, year = "2006", url = "http://arxiv.org/abs/cs.IT/0508043", bibtex = "http://www.hutter1.net/official/bib.htm#unimdlx", url = "http://arxiv.org/abs/cs.IT/0508043", pdf = "http://www.hutter1.net/ai/unimdlx.pdf", ps = "http://www.hutter1.net/ai/unimdlx.ps", latex = "http://www.hutter1.net/ai/unimdlx.tex", slides = "http://www.hutter1.net/ai/sunimdl.pdf", project = "http://www.hutter1.net/official/projects.htm#mdl", issn = "0022-0000", doi = "10.1016/j.jcss.2005.07.001", keywords = "Sequence prediction; Algorithmic Information Theory; Solomonoff's prior; Monotone Kolmogorov Complexity; Minimal Description Length; Convergence; Self-Optimizingness", abstract = "This paper studies sequence prediction based on the monotone Kolmogorov complexity $\Km=-\lb m$, i.e.\ based on universal MDL. $m$ is extremely close to Solomonoff's prior $M$, the latter being an excellent predictor in deterministic as well as probabilistic environments, where performance is measured in terms of convergence of posteriors or losses. Despite this closeness to $M$, it is difficult to assess the prediction quality of $m$, since little is known about the closeness of their posteriors, which are the important quantities for prediction. We show that for deterministic computable environments, the ``posterior'' and losses of $m$ converge, but rapid convergence could only be shown on-sequence; the off-sequence behavior is unclear. In probabilistic environments, neither the posterior nor the losses converge, in general.", }

@Proceedings{Hutter:06kcdagabs, editor = "Marcus Hutter and Wolfgang Merkle and Paul M. B. Vit\'anyi", title = "Kolmogorov Complexity and Applications", number = "06051", _month = jan/aug, year = "2006", series = "Dagstuhl Seminar Proceedings", url1 = "http://www.hutter1.net/dagstuhl/", url2 = "http://drops.dagstuhl.de/portals/06051", url3 = "http://drops.dagstuhl.de/opus/volltexte/2006/663", pdf = "http://www.hutter1.net/dagstuhl/kcdagabs.pdf", ps = "http://www.hutter1.net/dagstuhl/kcdagabs.ps", latex = "http://www.hutter1.net/dagstuhl/kcdagabs.tex", project = "http://www.hutter1.net/official/projects.htm#ait", issn = "1862-4405", publisher = "IBFI", _publisher = "Internationales Begegnungs- und Forschungszentrum fuer Informatik (IBFI), Schloss Dagstuhl, Germany", address = "Dagstuhl, Germany", keywords = "Information theory, Kolmogorov Complexity, effective randomnes, algorithmic probability, recursion theory, computational complexity, machine learning", abstract = "From 29.01.06 to 03.02.06, the Dagstuhl Seminar 06051 ``Kolmogorov Complexity and Applications'' was held in the International Conference and Research Center (IBFI), Schloss Dagstuhl. During the seminar, several participants presented their current research, and ongoing work and open problems were discussed. Abstracts of the presentations given during the seminar as well as abstracts of seminar results and ideas are put together in this proceedings. The first section describes the seminar topics and goals in general. Links to extended abstracts or full papers are provided, if available.", note = "http://drops.dagstuhl.de/portals/06051", }

## %-------------Publications-of-Marcus-Hutter-2005--------------%

@Article{Hutter:05mdl2px, author = "Jan Poland and Marcus Hutter", title = "Asymptotics of Discrete {MDL} for Online Prediction", journal = "IEEE Transactions on Information Theory", _month = nov, volume = "51", number = "11", pages = "3780--3795", year = "2005", bibtex = "http://www.hutter1.net/official/bib.htm#mdl2px", url = "http://arxiv.org/abs/cs.IT/0506022", pdf = "http://www.hutter1.net/ai/mdl2px.pdf", ps = "http://www.hutter1.net/ai/mdl2px.ps", latex = "http://www.hutter1.net/ai/mdl2px.zip", slides = "http://www.hutter1.net/ai/smdl2p.pdf", slidesppt = "http://www.hutter1.net/ai/smdl2p.ppt", project = "http://www.hutter1.net/official/projects.htm#mdl", doi = "10.1109/TIT.2005.856956", issn = "0018-9448", keywords = "Algorithmic Information Theory, Classification, Consistency, Discrete Model Class, Loss Bounds, Minimum Description Length, Regression, Sequence Prediction, Stabilization, Universal Induction.", abstract = "Minimum Description Length (MDL) is an important principle for induction and prediction, with strong relations to optimal Bayesian learning. This paper deals with learning non-i.i.d. processes by means of two-part MDL, where the underlying model class is countable. We consider the online learning framework, i.e. observations come in one by one, and the predictor is allowed to update his state of mind after each time step. We identify two ways of predicting by MDL for this setup, namely a static and a dynamic one. (A third variant, hybrid MDL, will turn out inferior.) We will prove that under the only assumption that the data is generated by a distribution contained in the model class, the MDL predictions converge to the true values almost surely. This is accomplished by proving finite bounds on the quadratic, the Hellinger, and the Kullback-Leibler loss of the MDL learner, which are however exponentially worse than for Bayesian prediction. We demonstrate that these bounds are sharp, even for model classes containing only Bernoulli distributions. We show how these bounds imply regret bounds for arbitrary loss functions. Our results apply to a wide range of setups, namely sequence prediction, pattern classification, regression, and universal induction in the sense of Algorithmic Information Theory among others.", }

@Article{Hutter:05tree, author = "Marco Zaffalon and Marcus Hutter", title = "Robust Inference of Trees", journal = "Annals of Mathematics and Artificial Intelligence", volume = "45", pages = "215--239", _month = oct, year = "2005", _publisher = "Springer", bibtex = "http://www.hutter1.net/official/bib.htm#tree", url = "http://arxiv.org/abs/cs.LG/0511087", pdf = "http://www.hutter1.net/ai/tree.pdf", ps = "http://www.hutter1.net/ai/tree.ps", latex = "http://www.hutter1.net/ai/tree.zip", project = "http://www.hutter1.net/official/projects.htm#robust", doi = "10.1007/s10472-005-9007-9", issn = "1012-2443", categories = "I.2. [Artificial Intelligence]", keywords = "Robust inference, spanning trees, intervals, dependence, graphical models, mutual information, imprecise probabilities, imprecise Dirichlet model.", abstract = "This paper is concerned with the reliable inference of optimal tree-approximations to the dependency structure of an unknown distribution generating data. The traditional approach to the problem measures the dependency strength between random variables by the index called mutual information. In this paper reliability is achieved by Walley's imprecise Dirichlet model, which generalizes Bayesian learning with Dirichlet priors. Adopting the imprecise Dirichlet model results in posterior interval expectation for mutual information, and in a set of plausible trees consistent with the data. Reliable inference about the actual tree is achieved by focusing on the substructure common to all the plausible trees. We develop an exact algorithm that infers the substructure in time O(m^4), m being the number of random variables. The new algorithm is applied to a set of data sampled from a known distribution. The method is shown to reliably infer edges of the actual tree even when the data are very scarce, unlike the traditional approach. Finally, we provide lower and upper credibility limits for mutual information under the imprecise Dirichlet model. These enable the previous developments to be extended to a full inferential method for trees.", }

@InProceedings{Hutter:05postbnd, author = "Alexey Chernov and Marcus Hutter", title = "Monotone Conditional Complexity Bounds on Future Prediction Errors", booktitle = "Proc. 16th International Conf. on Algorithmic Learning Theory ({ALT'05})", address = "Singapore", series = "LNAI", volume = "3734", _editor = "Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita", publisher = "Springer", pages = "414--428", _month = oct, year = "2005", bibtex = "http://www.hutter1.net/official/bib.htm#postbnd", url = "http://arxiv.org/abs/cs.LG/0507041", pdf = "http://www.hutter1.net/ai/postbnd.pdf", ps = "http://www.hutter1.net/ai/postbnd.ps", latex = "http://www.hutter1.net/ai/postbnd.tex", slides = "http://www.hutter1.net/ai/spostbnd.pdf", project = "http://www.hutter1.net/official/projects.htm#ait", doi = "10.1007/11564089_32", issn = "0302-9743", isbn = "3-540-29242-X", keywords = "Kolmogorov complexity, posterior bounds, online sequential prediction, Solomonoff prior, monotone conditional complexity, total error, future loss, randomness deficiency.", abstract = "We bound the future loss when predicting any (computably) stochastic sequence online. Solomonoff finitely bounded the total deviation of his universal predictor M from the true distribution m by the algorithmic complexity of m. Here we assume we are at a time t>1 and already observed x=x_1...x_t. We bound the future prediction performance on x_{t+1}x_{t+2}... by a new variant of algorithmic complexity of m given x, plus the complexity of the randomness deficiency of x. The new complexity is monotone in its condition in the sense that this complexity can only decrease if the condition is prolonged. We also briefly discuss potential generalizations to Bayesian model classes and to classification problems.", support = "SNF grant 200020-100259 and 2100-67712", znote = "Acceptance rate: 30/98 = 30\%", }

@InProceedings{Hutter:05actexp2, author = "Jan Poland and Marcus Hutter", title = "Defensive Universal Learning with Experts", booktitle = "Proc. 16th International Conf. on Algorithmic Learning Theory ({ALT'05})", address = "Singapore", series = "LNAI", volume = "3734", _editor = "Sanjay Jain and Hans Ulrich Simon and Etsuji Tomita", publisher = "Springer", _month = oct, pages = "356--370", year = "2005", bibtex = "http://www.hutter1.net/official/bib.htm#actexp2", url = "http://arxiv.org/abs/cs.LG/0507044", pdf = "http://www.hutter1.net/ai/actexp2.pdf", ps = "http://www.hutter1.net/ai/actexp2.ps", latex = "http://www.hutter1.net/ai/actexp2.tex", slides = "http://www.hutter1.net/ai/sactexp.pdf", slidesppt = "http://www.hutter1.net/ai/sactexp.ppt", project = "http://www.hutter1.net/official/projects.htm#expert", doi = "10.1007/11564089_28", issn = "0302-9743", isbn = "3-540-29242-X", keywords = "Prediction with expert advice, responsive environments, partial observation game, bandits, universal learning, asymptotic optimality.", abstract = "This paper shows how universal learning can be achieved with expert advice. To this aim, we specify an experts algorithm with the following characteristics: (a) it uses only feedback from the actions actually chosen (bandit setup), (b) it can be applied with countably infinite expert classes, and (c) it copes with losses that may grow in time appropriately slowly. We prove loss bounds against an adaptive adversary. From this, we obtain a master algorithm for ``reactive'' experts problems, which means that the master's actions may influence the behavior of the adversary. Our algorithm can significantly outperform standard experts algorithms on such problems. Finally, we combine it with a universal expert class. The resulting universal learner performs -- in a certain sense -- almost as well as any computable strategy, for any online decision problem. We also specify the (worst-case) convergence speed, which is very slow.", znote = "Acceptance rate: 30/98 = 30\%", }

@InProceedings{Hutter:05iors, author = "Shane Legg and Marcus Hutter", title = "A Universal Measure of Intelligence for Artificial Agents", booktitle = "Proc. 21st International Joint Conf. on Artificial Intelligence ({IJCAI-2005})", pages = "1509--1510", _editor = "L. P. Kaelbling and A. Saffiotti", _publisher = "Professional Book Center", address = "Edinburgh, Scottland", _month = aug, year = "2005", bibtex = "http://www.hutter1.net/official/bib.htm#iors", http = "http://dl.acm.org/citation.cfm?id=1642293.1642533", pdf = "http://www.hutter1.net/ai/iors.pdf", ps = "http://www.hutter1.net/ai/iors.ps", slides = "http://www.hutter1.net/ai/siors.pdf", project = "http://www.hutter1.net/official/projects.htm#uai", press = "http://www.hutter1.net/official/press.htm#ior", isbn_print = "0-938075-93-4", isbn_cd = "0-938075-94-2", support = "SNF grant 2100-67712", znote = "Acceptance rate: 112/453 = 25\%", }

@InProceedings{Hutter:05fuds, author = "Shane Legg and Marcus Hutter", title = "Fitness Uniform Deletion for Robust Optimization", booktitle = "Proc. Genetic and Evolutionary Computation Conference ({GECCO'05})", address = "Washington, OR, USA", editor = "H.-G. Beyer et al.", publisher = "ACM SigEvo", _month = jun, year = "2005", pages = "1271--1278", bibtex = "http://www.hutter1.net/official/bib.htm#fuds", http = "http://www.hutter1.net/ai/fuds.htm", url = "http://arxiv.org/abs/cs.NE/0504035", pdf = "http://www.hutter1.net/ai/fuds.pdf", ps = "http://www.hutter1.net/ai/fuds.ps", latex = "http://www.hutter1.net/ai/fuds.zip", slides = "http://www.hutter1.net/ai/sfuds.pdf", slidesppt = "http://www.hutter1.net/ai/sfuds.ppt", project = "http://www.hutter1.net/official/projects.htm#optimize", press = "http://www.hutter1.net/official/press.htm#fuss", code1 = "http://www.hutter1.net/ai/fussdd.cpp", code2 = "http://www.hutter1.net/ai/fussdd.h", code3 = "http://www.hutter1.net/ai/fusstsp.cpp", code4 = "http://www.hutter1.net/ai/fusstsp.h", doi = "10.1145/1068009.1068216", isbn = "1-59593-010-8", keywords = "Evolutionary algorithm, deletion schemes, fitness evaluation, optimization, fitness landscapes, (self)adaptation.", abstract = "A commonly experienced problem with population based optimisation methods is the gradual decline in population diversity that tends to occur over time. This can slow a system's progress or even halt it completely if the population converges on a local optimum from which it cannot escape. In this paper we present the Fitness Uniform Deletion Scheme (FUDS), a simple but somewhat unconventional approach to this problem. Under FUDS the deletion operation is modified to only delete those individuals which are ``common'' in the sense that there exist many other individuals of similar fitness in the population. This makes it impossible for the population to collapse to a collection of highly related individuals with similar fitness. Our experimental results on a range of optimisation problems confirm this, in particular for deceptive optimisation problems the performance is significantly more robust to variation in the selection intensity.", znote = "Acceptance rate: 253/549 = 46\%", }

@Article{Hutter:05expertx, author = "Marcus Hutter and Jan Poland", title = "Adaptive Online Prediction by Following the Perturbed Leader", volume = "6", _month = apr, year = "2005", pages = "639--660", journal = "Journal of Machine Learning Research", publisher = "Microtome", bibtex = "http://www.hutter1.net/official/bib.htm#expertx", http = "http://www.hutter1.net/ai/expertx.htm", url = "http://arxiv.org/abs/cs.AI/0504078", url2 = "http://www.jmlr.org/papers/v6/hutter05a.html", pdf = "http://www.hutter1.net/ai/expertx.pdf", ps = "http://www.hutter1.net/ai/expertx.ps", latex = "http://www.hutter1.net/ai/expertx.tex", slides = "http://www.hutter1.net/ai/sexpert.pdf", project = "http://www.hutter1.net/official/projects.htm#expert", issn = "1532-4435", keywords = "Prediction with Expert Advice, Follow the Perturbed Leader, general weights, adaptive learning rate, adaptive adversary, hierarchy of experts, expected and high probability bounds, general alphabet and loss, online sequential prediction.", abstract = "When applying aggregating strategies to Prediction with Expert Advice, the learning rate must be adaptively tuned. The natural choice of sqrt(complexity/current loss) renders the analysis of Weighted Majority derivatives quite complicated. In particular, for arbitrary weights there have been no results proven so far. The analysis of the alternative ``Follow the Perturbed Leader'' (FPL) algorithm from Kalai & Vempala (2003) (based on Hannan's algorithm) is easier. We derive loss bounds for adaptive learning rate and both finite expert classes with uniform weights and countable expert classes with arbitrary weights. For the former setup, our loss bounds match the best known results so far, while for the latter our results are new.", }

@Article{Hutter:05mifs, author = "Marcus Hutter and Marco Zaffalon", title = "Distribution of Mutual Information from Complete and Incomplete Data", journal = "Computational Statistics \& Data Analysis", volume = "48", number = "3", pages = "633--657", _month = mar, year = "2005", publisher = "Elsevier Science", bibtex = "http://www.hutter1.net/official/bib.htm#mifs", http = "http://www.hutter1.net/ai/mifs.htm", url = "http://arxiv.org/abs/cs.LG/0403025", pdf = "http://www.hutter1.net/ai/mifs.pdf", ps = "http://www.hutter1.net/ai/mifs.ps", latex = "http://www.hutter1.net/ai/mifs.zip", slides = "http://www.hutter1.net/ai/smimiss.pdf", slidesppt = "http://www.hutter1.net/ai/smimiss.ppt", project = "http://www.hutter1.net/official/projects.htm#robust", code = "http://www.hutter1.net/ai/mifs.cpp", doi = "10.1016/j.csda.2004.03.010", issn = "0167-9473", categories = "I.2. [Artificial Intelligence]", keywords = "Mutual information, cross entropy, Dirichlet distribution, second order distribution, expectation and variance of mutual information, feature selection, filters, naive Bayes classifier, Bayesian statistics.", abstract = "Mutual information is widely used, in a descriptive way, to measure the stochastic dependence of categorical random variables. In order to address questions such as the reliability of the descriptive value, one must consider sample-to-population inferential approaches. This paper deals with the posterior distribution of mutual information, as obtained in a Bayesian framework by a second-order Dirichlet prior distribution. The exact analytical expression for the mean, and analytical approximations for the variance, skewness and kurtosis are derived. These approximations have a guaranteed accuracy level of the order O(1/n^3), where n is the sample size. Leading order approximations for the mean and the variance are derived in the case of incomplete samples. The derived analytical expressions allow the distribution of mutual information to be approximated reliably and quickly. In fact, the derived expressions can be computed with the same order of complexity needed for descriptive mutual information. This makes the distribution of mutual information become a concrete alternative to descriptive mutual information in many applications which would benefit from moving to the inductive side. Some of these prospective applications are discussed, and one of them, namely feature selection, is shown to perform significantly better when inductive mutual information is used.", }

@InProceedings{Hutter:05mdlreg, author = "Jan Poland and Marcus Hutter", title = "Strong Asymptotic Assertions for Discrete {MDL} in Regression and Classification", booktitle = "Proc. 14th {D}utch-{B}elgium Conf. on Machine Learning ({Benelearn'05})", address = "Enschede", _editor = "Martijn {van Otterlo} and Mannes Poel and Anton Nijholt", pages = "67--72", _month = feb, year = "2005", _number = "WP05-03", _series = "CTIT Workshop Proceedings Series", _organization = "CTIT Research Institute, University of Twente", bibtex = "http://www.hutter1.net/official/bib.htm#mdlreg", url = "http://arxiv.org/abs/math.ST/0502315", conf = "http://hmi.ewi.utwente.nl/conference/benelearn2005", pdf = "http://www.hutter1.net/ai/mdlreg.pdf", ps = "http://www.hutter1.net/ai/mdlreg.ps", latex = "http://www.hutter1.net/ai/mdlreg.tex", slides = "http://www.hutter1.net/ai/smdlreg.pdf", slidesppt = "http://www.hutter1.net/ai/smdlreg.ppt", project = "http://www.hutter1.net/official/projects.htm#mdl", issn = "0929-0672", keywords = "Regression, Classification, Sequence Prediction, Machine Learning, Minimum Description Length, Bayes Mixture, Marginalization, Convergence, Discrete Model Classes.", abstract = "We study the properties of the MDL (or maximum penalized complexity) estimator for Regression and Classification, where the underlying model class is countable. We show in particular a finite bound on the Hellinger losses under the only assumption that there is a ``true'' model contained in the class. This implies almost sure convergence of the predictive distribution to the true one at a fast rate. It corresponds to Solomonoff's central theorem of universal induction, however with a bound that is exponentially larger.", }

@InProceedings{Hutter:05actexp, author = "Jan Poland and Marcus Hutter", title = "Master Algorithms for Active Experts Problems based on Increasing Loss Values", booktitle = "Proc. 14th {D}utch-{B}elgium Conf. on Machine Learning ({Benelearn'05})", address = "Enschede", _editor = "Martijn {van Otterlo} and Mannes Poel and Anton Nijholt", pages = "59--66", _month = feb, year = "2005", _number = "WP05-03", _series = "CTIT Workshop Proceedings Series", _organization = "CTIT Research Institute, University of Twente", bibtex = "http://www.hutter1.net/official/bib.htm#actexp", url = "http://arxiv.org/abs/cs.LG/0502067", conf = "http://hmi.ewi.utwente.nl/conference/benelearn2005", pdf = "http://www.hutter1.net/ai/actexp.pdf", ps = "http://www.hutter1.net/ai/actexp.ps", latex = "http://www.hutter1.net/ai/actexp.tex", slides = "http://www.hutter1.net/ai/sactexp.pdf", slidesppt = "http://www.hutter1.net/ai/sactexp.ppt", project = "http://www.hutter1.net/official/projects.htm#expert", issn = "0929-0672", keywords = "Prediction with expert advice, responsive environments, partial observation game, bandits, universal learning, asymptotic optimality.", abstract = "We specify an experts algorithm with the following characteristics: (a) it uses only feedback from the actions actually chosen (bandit setup), (b) it can be applied with countably infinite expert classes, and (c) it copes with losses that may grow in time appropriately slowly. We prove loss bounds against an adaptive adversary. From this, we obtain master algorithms for ``active experts problems'', which means that the master's actions may influence the behavior of the adversary. Our algorithm can significantly outperform standard experts algorithms on such problems. Finally, we combine it with a universal expert class. This results in a (computationally infeasible) universal master algorithm which performs - in a certain sense - almost as well as any computable strategy, for any online problem.", }

@Slides{Hutter:05predict, author = "Marcus Hutter", title = "How to predict with {Bayes}, {MDL}, and {Experts}", _month = jan, year = "2005", note = "Presented at the Machine Learning Summer School (MLSS)", http = "http://canberra05.mlss.cc/", url = "http://www.idsia.ch/~marcus/ai/predict.htm", slides = "http://www.idsia.ch/~marcus/ai/spredict.pdf", }

@InProceedings{Hutter:05bayestree, author = "Marcus Hutter", title = "Fast Non-Parametric {B}ayesian Inference on Infinite Trees", booktitle = "Proc. 10th International Conf. on Artificial Intelligence and Statistics ({AISTATS-2005})", _address = "Barbados", _editor = "R. G. Cowell and Z. Ghahramani", publisher = "Society for Artificial Intelligence and Statistics", pages = "144--151", _month = jan, year = "2005", bibtex = "http://www.hutter1.net/official/bib.htm#bayestree", http = "http://www.hutter1.net/ai/bayestree.htm", url = "http://arxiv.org/abs/math.PR/0411515", pdf = "http://www.hutter1.net/ai/bayestree.pdf", ps = "http://www.hutter1.net/ai/bayestree.ps", latex = "http://www.hutter1.net/ai/bayestree.zip", slides = "http://www.hutter1.net/ai/sbayestree.pdf", project = "http://www.hutter1.net/official/projects.htm#bayes", code = "http://www.hutter1.net/ai/bayestree.c", isbn = "0-9727358-1-X", keywords = "Bayesian density estimation, exact linear time algorithm, non-parametric inference, adaptive infinite tree, Polya tree, scale invariance.", abstract = "Given i.i.d. data from an unknown distribution, we consider the problem of predicting future items. An adaptive way to estimate the probability density is to recursively subdivide the domain to an appropriate data-dependent granularity. A Bayesian would assign a data-independent prior probability to ``subdivide'', which leads to a prior over infinite(ly many) trees. We derive an exact, fast, and simple inference algorithm for such a prior, for the data evidence, the predictive distribution, the effective model dimension, and other quantities.", znote = "Acceptance rate: 57/150 = 38\%", }

## %-------------Publications-of-Marcus-Hutter-2004--------------%

@TechReport{Hutter:04mdp, author = "Shane Legg and Marcus Hutter", number = "IDSIA-21-04", title = "Ergodic {MDP}s Admit Self-Optimising Policies", year = "2004", institution = "{IDSIA}", }

TechReport{Hutter:04env, author = "Shane Legg and Marcus Hutter", number = "IDSIA-20-04", title = "A Taxonomy for Abstract Environments", year = "2004", institution = "{IDSIA}", }

@Book{Hutter:04uaibook, author = "Marcus Hutter", title = "Universal Artificial Intelligence: Sequential Decisions based on Algorithmic Probability", _series = "EATCS", publisher = "Springer", address = "Berlin", year = "2005", isbn = "3-540-22139-5", isbn-online = "978-3-540-26877-2", doi = "10.1007/b138233", note = "300 pages, http://www.hutter1.net/ai/uaibook.htm", url = "http://www.hutter1.net/ai/uaibook.htm", review1 = "AIJ: http://dx.doi.org/10.1016/j.artint.2006.10.005", review2 = "ACM: http://www.reviews.com/review/review_review.cfm?review_id=131175", reviews = "Amazon: http://www.amazon.com/exec/obidos/redirect?tag=homepageofm0a-20&path=ASIN/3540221395", slides = "http://www.hutter1.net/ai/suaibook.pdf", video = "http://vimeo.com/14888930", keywords = "Artificial intelligence; algorithmic probability; sequential decision theory; Solomonoff induction; Kolmogorov complexity; Bayes mixture distributions; reinforcement learning; universal sequence prediction; tight loss and error bounds; Levin search; strategic games; function minimization; supervised learning.", abstract = "This book presents sequential decision theory from a novel algorithmic information theory perspective. While the former theory is suited for active agents in known environments, the latter is suited for passive prediction of unknown environments. The book introduces these two well-known but very different ideas and removes the limitations by unifying them to one parameter-free theory of an optimal reinforcement learning agent interacting with an arbitrary unknown world. Most if not all AI problems can easily be formulated within this theory, which reduces the conceptual problems to pure computational ones. Considered problem classes include sequence prediction, strategic games, function minimization, reinforcement and supervised learning. Formal definitions of intelligence order relations, the horizon problem and relations to other approaches to AI are discussed. One intention of this book is to excite a broader AI audience about abstract algorithmic information theory concepts, and conversely to inform theorists about exciting applications to AI.", support = "SNF grant 2000-61847", }

@InProceedings{Hutter:04mlconvx, author = "Marcus Hutter and Andrej A. Muchnik", title = "Universal Convergence of Semimeasures on Individual Random Sequences", booktitle = "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})", address = "Padova, Italy", series = "LNAI", volume = "3244", _editor = "S. Ben-David and J. Case and A. Maruoka", publisher = "Springer", pages = "234--248", year = "2004", doi = "10.1007/978-3-540-30215-5_19", issn = "0302-9743", isbn = "3-540-23356-3", http = "http://www.hutter1.net/ai/mlconvx.htm", url = "http://arxiv.org/abs/cs.LG/0407057", keywords = "Sequence prediction; Algorithmic Information Theory; universal enumerable semimeasure; mixture distributions; posterior convergence; Martin-L{\"o}f randomness; quasimeasures.", abstract = "Solomonoff's central result on induction is that the posterior of a universal semimeasure M converges rapidly and with probability 1 to the true sequence generating posterior mu, if the latter is computable. Hence, M is eligible as a universal sequence predictor in case of unknown mu. Despite some nearby results and proofs in the literature, the stronger result of convergence for all (Martin-Loef) random sequences remained open. Such a convergence result would be particularly interesting and natural, since randomness can be defined in terms of M itself. We show that there are universal semimeasures M which do not converge for all random sequences, i.e. we give a partial negative answer to the open problem. We also provide a positive answer for some non-universal semimeasures. We define the incomputable measure D as a mixture over all computable measures and the enumerable semimeasure W as a mixture over all enumerable nearly-measures. We show that W converges to D and D to mu on all random sequences. The Hellinger distance measuring closeness of two distributions plays a central role.", znote = "Acceptance rate: 29/91 = 32\%", }

@InProceedings{Hutter:04expert, author = "Marcus Hutter and Jan Poland", title = "Prediction with Expert Advice by Following the Perturbed Leader for General Weights", booktitle = "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})", address = "Padova, Italy", series = "LNAI", volume = "3244", _editor = "S. Ben-David and J. Case and A. Maruoka", publisher = "Springer", pages = "279--293", year = "2004", doi = "10.1007/978-3-540-30215-5_22", issn = "0302-9743", isbn = "3-540-23356-3", http = "http://www.hutter1.net/ai/expert.htm", url = "http://arxiv.org/abs/cs.LG/0405043", keywords = "Prediction with Expert Advice, Follow the Perturbed Leader, general weights, adaptive learning rate, hierarchy of experts, expected and high probability bounds, general alphabet and loss, online sequential prediction.", abstract = "When applying aggregating strategies to Prediction with Expert Advice, the learning rate must be adaptively tuned. The natural choice of sqrt(complexity/current loss) renders the analysis of Weighted Majority derivatives quite complicated. In particular, for arbitrary weights there have been no results proven so far. The analysis of the alternative ``Follow the Perturbed Leader'' (FPL) algorithm from Kalai \& Vempala (2003) (based on Hannan's algorithm) is easier. We derive loss bounds for adaptive learning rate and both finite expert classes with uniform weights and countable expert classes with arbitrary weights. For the former setup, our loss bounds match the best known results so far, while for the latter our results are new.", znote = "Acceptance rate: 29/91 = 32\%", }

@InProceedings{Hutter:04mdlspeed, author = "Jan Poland and Marcus Hutter", title = "On the convergence speed of {MDL} predictions for {B}ernoulli sequences", booktitle = "Proc. 15th International Conf. on Algorithmic Learning Theory ({ALT'04})", address = "Padova, Italy", series = "LNAI", volume = "3244", _editor = "S. Ben-David and J. Case and A. Maruoka", publisher = "Springer", pages = "294--308", year = "2004", doi = "10.1007/978-3-540-30215-5_23", issn = "0302-9743", isbn = "3-540-23356-3", http = "http://www.hutter1.net/ai/mdlspeed.htm", url = "http://arxiv.org/abs/cs.LG/0407039", keywords = "MDL, Minimum Description Length, Convergence Rate, Prediction, Bernoulli, Discrete Model Class.", abstract = "We consider the Minimum Description Length principle for online sequence prediction. If the underlying model class is discrete, then the total expected square loss is a particularly interesting performance measure: (a) this quantity is bounded, implying convergence with probability one, and (b) it additionally specifies a `rate of convergence'. Generally, for MDL only exponential loss bounds hold, as opposed to the linear bounds for a Bayes mixture. We show that this is even the case if the model class contains only Bernoulli distributions. We derive a new upper bound on the prediction error for countable Bernoulli classes. This implies a small bound (comparable to the one for Bayes mixtures) for certain important model classes. The results apply to many Machine Learning tasks including classification and hypothesis testing. We provide arguments that our theorems generalize to countable classes of i.i.d. models.", znote = "Acceptance rate: 29/91 = 32\%", }

@TechReport{Hutter:04bayespea, author = "Marcus Hutter", title = "Online Prediction -- {B}ayes versus Experts", institution = "http://www.idsia.ch/$_{^\sim}$marcus/ai/bayespea.htm", _month = jul, pages = "4 pages", year = "2004", note = "Presented at the {\em EU PASCAL Workshop on Learning Theoretic and Bayesian Inductive Principles (LTBIP-2004)}", url = "http://www.hutter1.net/ai/bayespea.htm", ps = "http://www.hutter1.net/ai/bayespea.ps", pdf = "http://www.hutter1.net/ai/bayespea.pdf", slides = "http://www.hutter1.net/ai/sbayespea.pdf", keywords = "Bayesian sequence prediction; Prediction with Expert Advice; general weights, alphabet and loss.", abstract = "We derive a very general regret bound in the framework of prediction with expert advice, which challenges the best known regret bound for Bayesian sequence prediction. Both bounds of the form $\sqrt{\mbox{Loss}\times\mbox{complexity}}$ hold for any bounded loss-function, any prediction and observation spaces, arbitrary expert/environment classes and weights, and unknown sequence length.", }

@InProceedings{Hutter:04mdl2p, author = "Jan Poland and Marcus Hutter", title = "Convergence of Discrete {MDL} for Sequential Prediction", booktitle = "Proc. 17th Annual Conf. on Learning Theory ({COLT'04})", address = "Banff, Canada", series = "LNAI", volume = "3120", _editor = "J. Shawe-Taylor and Y. Singer", publisher = "Springer", pages = "300--314", year = "2004", doi = "10.1007/978-3-540-27819-1_21", isbn = "3-540-22282-0", http = "http://www.hutter1.net/ai/mdl2p.htm", url = "http://arxiv.org/abs/cs.LG/0404057", keywords = "Minimum Description Length, Sequence Prediction, Convergence, Discrete Model Classes, Universal Induction, Stabilization, Algorithmic Information Theory.", abstract = "We study the properties of the Minimum Description Length principle for sequence prediction, considering a two-part MDL estimator which is chosen from a countable class of models. This applies in particular to the important case of universal sequence prediction, where the model class corresponds to all algorithms for some fixed universal Turing machine (this correspondence is by enumerable semimeasures, hence the resulting models are stochastic). We prove convergence theorems similar to Solomonoff's theorem of universal induction, which also holds for general Bayes mixtures. The bound characterizing the convergence speed for MDL predictions is exponentially larger as compared to Bayes mixtures. We observe that there are at least three different ways of using MDL for prediction. One of these has worse prediction properties, for which predictions only converge if the MDL estimator stabilizes. We establish sufficient conditions for this to occur. Finally, some immediate consequences for complexity relations and randomness criteria are proven.", znote = "Acceptance rate: 44/107 = 41\%", }

@InProceedings{Hutter:04fussexp, author = "Shane Legg and Marcus Hutter and Akshat Kumar", title = "Tournament versus Fitness Uniform Selection", booktitle = "Proc. 2004 Congress on Evolutionary Computation ({CEC'04})", address = "Portland, OR, USA", xeditor = "??", publisher = "IEEE", isbn = "0-7803-8515-2", _month = jun, year = "2004", pages = "2144--2151", keywords = "Selection schemes, fitness evaluation, optimization, fitness landscapes, basic working principles of evolutionary computations, (self)adaptation, evolutionary algorithm, deceptive \& multimodal optimization problems.", http = "http://www.hutter1.net/ai/fussexp.htm", url = "http://arxiv.org/abs/cs.LG/0403038", doi = "10.1109/CEC.2004.1331162", press = "http://www.trnmag.com/Stories/032801/Diversity_trumps_fitness_032801.html", abstract = "In evolutionary algorithms a critical parameter that must be tuned is that of selection pressure. If it is set too low then the rate of convergence towards the optimum is likely to be slow. Alternatively if the selection pressure is set too high the system is likely to become stuck in a local optimum due to a loss of diversity in the population. The recent Fitness Uniform Selection Scheme (FUSS) is a conceptually simple but somewhat radical approach to addressing this problem --- rather than biasing the selection towards higher fitness, FUSS biases selection towards sparsely populated fitness levels. In this paper we compare the relative performance of FUSS with the well known tournament selection scheme on a range of problems.", znote = "Acceptance rate: 300/460 = 65\%", }

## %-------------Publications-of-Marcus-Hutter-2003--------------%

@PhDThesis{Hutter:03habil, author = "Marcus Hutter", school = "Fakult{\"a}t f{\"u}r Informatik", address = "TU M{\"u}nchen", title = "Optimal Sequential Decisions based on Algorithmic Probability", year = "2003", pages = "1--288", http = "http://www.hutter1.net/ai/habil.htm", url = "http://arxiv.org/abs/cs.AI/0306091", keywords = "Artificial intelligence; algorithmic probability; sequential decision theory; Solomonoff induction; Kolmogorov complexity; Bayes-mixture distributions; reinforcement learning; universal sequence prediction; tight loss and error bounds; Levin search; strategic games; function minimization; supervised learning.", abstract = "Decision theory formally solves the problem of rational agents in uncertain worlds if the true environmental prior probability distribution is known. Solomonoff's theory of universal induction formally solves the problem of sequence prediction for unknown prior distribution. In this \thesis\ both ideas are unified to one parameter-free theory for universal Artificial Intelligence. We give strong arguments that the resulting AIXI model is the most intelligent unbiased agent possible. We outline for a number of problem classes, including sequence prediction, strategic games, function minimization, reinforcement and supervised learning, how the AIXI model can formally solve them. The major drawback of the AIXI model is that it is uncomputable. To overcome this problem, we construct a modified algorithm AIXI$tl$, which is still effectively more intelligent than any other time $t$ and length $l$ bounded agent. The computation time of AIXI$tl$ is of the order $t\cdot 2^l$. The discussion includes formal definitions of intelligence order relations, the horizon problem and relations of the AIXI theory to other AI approaches.", }

@InProceedings{Hutter:03unimdl, author = "Marcus Hutter", title = "Sequence Prediction based on Monotone Complexity", booktitle = "Proc. 16th Annual Conf. on Learning Theory ({COLT'03})", address = "Washington, DC, USA", series = "LNAI", volume = "2777", _editor = "B. Sch{\"o}lkopf and M. K. Warmuth", publisher = "Springer", pages = "506--521", year = "2003", isbn = "3-540-40720-0", doi = "10.1007/978-3-540-45167-9_37", http = "http://www.hutter1.net/ai/unimdl.htm", url = "http://arxiv.org/abs/cs.AI/0306036", keywords = "Sequence prediction; Algorithmic Information Theory; Solomonoff's prior; Monotone Kolmogorov Complexity; Minimal Description Length; Convergence; Self-Optimizingness", abstract = "This paper studies sequence prediction based on the monotone Kolmogorov complexity $\Km=-\lb m$, i.e.\ based on universal MDL. $m$ is extremely close to Solomonoff's prior $M$, the latter being an excellent predictor in deterministic as well as probabilistic environments, where performance is measured in terms of convergence of posteriors or losses. Despite this closeness to $M$, it is difficult to assess the prediction quality of $m$, since little is known about the closeness of their posteriors, which are the important quantities for prediction. We show that for deterministic computable environments, the ``posterior'' and losses of $m$ converge, but rapid convergence could only be shown on-sequence; the off-sequence behavior is unclear. In probabilistic environments, neither the posterior nor the losses converge, in general.", znote = "Acceptance rate: 49/92 = 53\%", }

@InProceedings{Hutter:03unipriors, author = "Marcus Hutter", title = "On the Existence and Convergence of Computable Universal Priors", booktitle = "Proc. 14th International Conf. on Algorithmic Learning Theory ({ALT'03})", address = "Sapporo, Japan", _editor = "Ricard Gavald{\'a} and Klaus P. Jantke and Eiji Takimoto", series = "LNAI", volume = "2842", publisher = "Springer", pages = "298--312", _month = sep, year = "2003", ISSN = "0302-9743", isbn = "3-540-20291-9", doi = "10.1007/978-3-540-39624-6_24", http = "http://www.hutter1.net/ai/uniprior.htm", url = "http://arxiv.org/abs/cs.LG/0305052", keywords = "Sequence prediction; Algorithmic Information Theory; Solomonoff's prior; universal probability; mixture distributions; posterior convergence; computability concepts; Martin-L{\"o}f randomness.", abstract = "Solomonoff unified Occam's razor and Epicurus' principle of multiple explanations to one elegant, formal, universal theory of inductive inference, which initiated the field of algorithmic information theory. His central result is that the posterior of his universal semimeasure $M$ converges rapidly to the true sequence generating posterior $\mu$, if the latter is computable. Hence, $M$ is eligible as a universal predictor in case of unknown $\mu$. We investigates the existence, computability and convergence of universal (semi)measures for a hierarchy of computability classes: finitely computable, estimable, (co)enumerable, and approximable. For instance, $\MM(x)$ is known to be enumerable, but not finitely computable, and to dominates all enumerable semimeasures. We define seven classes of (semi)measures based on these four computability concepts. Each class may or may not contain a (semi)measures which dominates all elements of another class. The analysis of these 49 cases can be reduced to four basic cases, two of them being new. We present proofs for discrete and continuous semimeasures. We also investigate more closely the type of convergence, possibly implied by universality (in difference and in ratio, with probability 1, in mean sum, and for Martin-L{\"o}f random sequences).", znote = "Acceptance rate: 19/37 = 51\%?", }

@InProceedings{Hutter:03mlconv, author = "Marcus Hutter", title = "An Open Problem Regarding the Convergence of Universal A Priori Probability", booktitle = "Proc. 16th Annual Conf. on Learning Theory ({COLT'03})", address = "Washington, DC, USA", series = "LNAI", volume = "2777", _editor = "B. Sch{\"o}lkopf and M. K. Warmuth", publisher = "Springer", pages = "738--740", year = "2003", isbn = "3-540-40720-0", doi = "10.1007/978-3-540-45167-9_58", url = "http://www.hutter1.net/ai/mlconv.htm", keywords = "Sequence prediction; Algorithmic Information Theory; Solomonoff's prior; universal probability; posterior convergence; Martin-L{\"o}f randomness.", abstract = "Is the textbook result that Solomonoff's universal posterior converges to the true posterior for all Martin-L{\"o}f random sequences true?", }

@Article{Hutter:03optisp, author = "Marcus Hutter", title = "Optimality of Universal {B}ayesian Prediction for General Loss and Alphabet", _month = Nov, volume = "4", year = "2003", pages = "971--1000", journal = "Journal of Machine Learning Research", publisher = "MIT Press", http = "http://www.hutter1.net/ai/optisp.htm", url = "http://arxiv.org/abs/cs.LG/0311014", url2 = "http://www.jmlr.org/papers/volume4/hutter03a/", url3 = "http://www.jmlr.org/papers/v4/hutter03a.html", issn = "1532-4435", keywords = "Bayesian sequence prediction; mixture distributions; Solomonoff induction; Kolmogorov complexity; learning; universal probability; tight loss and error bounds; Pareto-optimality; games of chance; classification.", abstract = "Various optimality properties of universal sequence predictors based on Bayes-mixtures in general, and Solomonoff's prediction scheme in particular, will be studied. The probability of observing $x_t$ at time $t$, given past observations $x_1...x_{t-1}$ can be computed with the chain rule if the true generating distribution $\mu$ of the sequences $x_1x_2x_3...$ is known. If $\mu$ is unknown, but known to belong to a countable or continuous class $\M$ one can base ones prediction on the Bayes-mixture $\xi$ defined as a $w_\nu$-weighted sum or integral of distributions $\nu\in\M$. The cumulative expected loss of the Bayes-optimal universal prediction scheme based on $\xi$ is shown to be close to the loss of the Bayes-optimal, but infeasible prediction scheme based on $\mu$. We show that the bounds are tight and that no other predictor can lead to significantly smaller bounds. Furthermore, for various performance measures, we show Pareto-optimality of $\xi$ and give an Occam's razor argument that the choice $w_\nu\sim 2^{-K(\nu)}$ for the weights is optimal, where $K(\nu)$ is the length of the shortest program describing $\nu$. The results are applied to games of chance, defined as a sequence of bets, observations, and rewards. The prediction schemes (and bounds) are compared to the popular predictors based on expert advice. Extensions to infinite alphabets, partial, delayed and probabilistic prediction, classification, and more active systems are briefly discussed.", znote = "Inofficial numbers: Acceptance rate: 27\%", }

@InProceedings{Hutter:03idm, author = "Marcus Hutter", title = "Robust Estimators under the {I}mprecise {D}irichlet {M}odel", booktitle = "Proc. 3rd International Symposium on Imprecise Probabilities and Their Application ({ISIPTA-2003})", _editor = "Jean-Marc Bernard and Teddy Seidenfeld and Marco Zaffalon", publisher = "Carleton Scientific", series = "Proceedings in Informatics", volume = "18", address = "Lugano,Switzerland", year = "2003", pages = "274--289", isbn = "1-894145-17-8", http = "http://www.hutter1.net/ai/idm.htm", url = "http://arxiv.org/abs/math.PR/0305121", keywords = "Imprecise Dirichlet Model; exact, conservative, approximate, robust, confidence interval estimates; entropy; mutual information.", abstract = "Walley's Imprecise Dirichlet Model (IDM) for categorical data overcomes several fundamental problems which other approaches to uncertainty suffer from. Yet, to be useful in practice, one needs efficient ways for computing the imprecise=robust sets or intervals. The main objective of this work is to derive exact, conservative, and approximate, robust and credible interval estimates under the IDM for a large class of statistical estimators, including the entropy and mutual information.", znote = "Inofficial numbers: Acceptance rate: 44/55 = 80\% ?", }

@InProceedings{Hutter:03mimiss, author = "Marcus Hutter and Marco Zaffalon", title = "Bayesian Treatment of Incomplete Discrete Data applied to Mutual Information and Feature Selection", _month = sep, year = "2003", pages = "396--406", series = "LNAI", volume = "2821", booktitle = "Proc. 26th German Conf. on Artificial Intelligence (KI-2003)", _editor = "A. G{\"u}nter, R. Kruse and B. Neumann", address = "Hamburg, Germany", publisher = "Springer", doi = "10.1007/978-3-540-39451-8_29", issn = "0302-9743", isbn = "3-540-00168-9", http = "http://www.hutter1.net/ai/mimiss.htm", url = "http://arxiv.org/abs/cs.LG/0306126", keywords = "Incomplete data, Bayesian statistics, expectation maximization, global optimization, Mutual Information, Cross Entropy, Dirichlet distribution, Second order distribution, Credible intervals, expectation and variance of mutual information, missing data, Robust feature selection, Filter approach, naive Bayes classifier.", abstract = "Given the joint chances of a pair of random variables one can compute quantities of interest, like the mutual information. The Bayesian treatment of unknown chances involves computing, from a second order prior distribution and the data likelihood, a posterior distribution of the chances. A common treatment of incomplete data is to assume ignorability and determine the chances by the expectation maximization (EM) algorithm. The two different methods above are well established but typically separated. This paper joins the two approaches in the case of Dirichlet priors, and derives efficient approximations for the mean, mode and the (co)variance of the chances and the mutual information. Furthermore, we prove the unimodality of the posterior distribution, whence the important property of convergence of EM to the global maximum in the chosen framework. These results are applied to the problem of selecting features for incremental learning and naive Bayes classification. A fast filter based on the distribution of mutual information is shown to outperform the traditional filter based on empirical mutual information on a number of incomplete real data sets.", znote = "Acceptance rate: 42/90 = 46\%", }

@Article{Hutter:03spupper, author = "Marcus Hutter", title = "Convergence and Loss Bounds for {Bayesian} Sequence Prediction", _month = aug, volume = "49", number = "8", year = "2003", pages = "2061--2067", address = "Manno(Lugano), Switzerland", journal = "IEEE Transactions on Information Theory", doi = "10.1109/TIT.2003.814488", issn = "0018-9448", http = "http://www.hutter1.net/ai/spupper.htm", url = "http://arxiv.org/abs/cs.LG/0301014", keywords = "Bayesian sequence prediction; general loss function and bounds; convergence; mixture distributions.", abstract = "The probability of observing $x_t$ at time $t$, given past observations $x_1...x_{t-1}$ can be computed with Bayes rule if the true generating distribution $\mu$ of the sequences $x_1x_2x_3...$ is known. If $\mu$ is unknown, but known to belong to a class $M$ one can base ones prediction on the Bayes mix $\xi$ defined as a weighted sum of distributions $\nu\in M$. Various convergence results of the mixture posterior $\xi_t$ to the true posterior $\mu_t$ are presented. In particular a new (elementary) derivation of the convergence $\xi_t/\mu_t\to 1$ is provided, which additionally gives the rate of convergence. A general sequence predictor is allowed to choose an action $y_t$ based on $x_1...x_{t-1}$ and receives loss $\ell_{x_t y_t}$ if $x_t$ is the next symbol of the sequence. No assumptions are made on the structure of $\ell$ (apart from being bounded) and $M$. The Bayes-optimal prediction scheme $\Lambda_\xi$ based on mixture $\xi$ and the Bayes-optimal informed prediction scheme $\Lambda_\mu$ are defined and the total loss $L_\xi$ of $\Lambda_\xi$ is bounded in terms of the total loss $L_\mu$ of $\Lambda_\mu$. It is shown that $L_\xi$ is bounded for bounded $L_\mu$ and $L_\xi/L_\mu\to 1$ for $L_\mu\to \infty$. Convergence of the instantaneous losses is also proven.", }

## %-------------Publications-of-Marcus-Hutter-2002--------------%

@InProceedings{Hutter:02feature, author = "Marco Zaffalon and Marcus Hutter", title = "Robust Feature Selection by Mutual Information Distributions", _month = jun, year = "2002", pages = "577--584", booktitle = "Proc. 18th International Conf. on Uncertainty in Artificial Intelligence (UAI-2002)", _editor = "A. Darwiche and N. Friedman", publisher = "Morgan Kaufmann, San Francisco, CA", isbn = "1-55860-897-4", http = "http://www.hutter1.net/ai/feature.htm", url = "http://arxiv.org/abs/cs.AI/0206006", categories = "I.2. [Artificial Intelligence]", keywords = "Robust feature selection, Filter approach, naive Bayes classifier, Mutual Information, Cross Entropy, Dirichlet distribution, Second order distribution, Bayesian statistics, Credible intervals, expectation and variance of mutual information, missing data.", abstract = "Mutual information is widely used in artificial intelligence, in a descriptive way, to measure the stochastic dependence of discrete random variables. In order to address questions such as the reliability of the empirical value, one must consider sample-to-population inferential approaches. This paper deals with the distribution of mutual information, as obtained in a Bayesian framework by a second-order Dirichlet prior distribution. The exact analytical expression for the mean and an analytical approximation of the variance are reported. Asymptotic approximations of the distribution are proposed. The results are applied to the problem of selecting features for incremental learning and classification of the naive Bayes classifier. A fast, newly defined method is shown to outperform the traditional approach based on empirical mutual information on a number of real data sets. Finally, a theoretical development is reported that allows one to efficiently extend the above methods to incomplete samples in an easy and effective way.", znote = "Acceptance rate: 66/192 = 34\%", }

@InProceedings{Hutter:02selfopt, author = "Marcus Hutter", title = "Self-Optimizing and {P}areto-Optimal Policies in General Environments based on {B}ayes-Mixtures", _month = jul, series = "LNAI", volume = "2375", year = "2002", pages = "364--379", address = "Sydney, Australia", booktitle = "Proc. 15th Annual Conf. on Computational Learning Theory ({COLT'02})", _editor = "J. Kivinen and R. H. Sloan", publisher = "Springer", isbn = "978-3-540-43836-6", doi = "10.1007/3-540-45435-7_25", http = "http://www.hutter1.net/ai/selfopt.htm", url = "http://arxiv.org/abs/cs.AI/0204040", keywords = "Rational agents, sequential decision theory, reinforcement learning, value function, Bayes mixtures, self-optimizing policies, Pareto-optimality, unbounded effective horizon, (non) Markov decision processes.", abstract = "The problem of making sequential decisions in unknown probabilistic environments is studied. In cycle $t$ action $y_t$ results in perception $x_t$ and reward $r_t$, where all quantities in general may depend on the complete history. The perception $x_t'$ and reward $r_t$ are sampled from the (reactive) environmental probability distribution $\mu$. This very general setting includes, but is not limited to, (partial observable, k-th order) Markov decision processes. Sequential decision theory tells us how to act in order to maximize the total expected reward, called value, if $\mu$ is known. Reinforcement learning is usually used if $\mu$ is unknown. In the Bayesian approach one defines a mixture distribution $\xi$ as a weighted sum of distributions $\nu\in\M$, where $\M$ is any class of distributions including the true environment $\mu$. We show that the Bayes-optimal policy $p^\xi$ based on the mixture $\xi$ is self-optimizing in the sense that the average value converges asymptotically for all $\mu\in\M$ to the optimal value achieved by the (infeasible) Bayes-optimal policy $p^\mu$ which knows $\mu$ in advance. We show that the necessary condition that $\M$ admits self-optimizing policies at all, is also sufficient. No other structural assumptions are made on $\M$. As an example application, we discuss ergodic Markov decision processes, which allow for self-optimizing policies. Furthermore, we show that $p^\xi$ is Pareto-optimal in the sense that there is no other policy yielding higher or equal value in {\em all} environments $\nu\in\M$ and a strictly higher value in at least one.", znote = "Acceptance rate: 26/55 = 47\%", }

@InProceedings{Hutter:01xentropy, author = "Marcus Hutter", title = "Distribution of Mutual Information", _month = dec, booktitle = "Advances in Neural Information Processing Systems 14", _editor = "T. G. Dietterich and S. Becker and Z. Ghahramani", publisher = "MIT Press", address = "Cambridge, MA, USA", pages = "399--406", year = "2002", isbn = "0262042088", http = "http://www.hutter1.net/ai/xentropy.htm", url = "http://arxiv.org/abs/cs.AI/0112019", categories = "I.2. [Artificial Intelligence]", keywords = "Mutual Information, Cross Entropy, Dirichlet distribution, Second order distribution, expectation and variance of mutual information.", abstract = "The mutual information of two random variables i and j with joint probabilities t_ij is commonly used in learning Bayesian nets as well as in many other fields. The chances t_ij are usually estimated by the empirical sampling frequency n_ij/n leading to a point estimate I(n_ij/n) for the mutual information. To answer questions like ``is I(n_ij/n) consistent with zero?'' or ``what is the probability that the true mutual information is much larger than the point estimate?'' one has to go beyond the point estimate. In the Bayesian framework one can answer these questions by utilizing a (second order) prior distribution p(t) comprising prior information about t. From the prior p(t) one can compute the posterior p(t|n), from which the distribution p(I|n) of the mutual information can be calculated. We derive reliable and quickly computable approximations for p(I|n). We concentrate on the mean, variance, skewness, and kurtosis, and non-informative priors. For the mean we also give an exact expression. Numerical issues and the range of validity are discussed.", znote = "Acceptance rate: 196/660 = 30\%", }

@InProceedings{Hutter:02fuss, author = "Marcus Hutter", title = "Fitness Uniform Selection to Preserve Genetic Diversity", booktitle = "Proc. 2002 Congress on Evolutionary Computation (CEC-2002)", address = "Honolulu, HI, USA", publisher = "IEEE", ISSN = "1098-7576", _month = may, year = "2002", pages = "783--788", keywords = "Evolutionary algorithms, fitness uniform selection strategy, preserve diversity, local optima, evolution, correlated recombination, crossover.", http = "http://www.hutter1.net/ai/pfuss.htm", url = "http://arxiv.org/abs/cs.AI/0103015", abstract = "In evolutionary algorithms, the fitness of a population increases with time by mutating and recombining individuals and by a biased selection of more fit individuals. The right selection pressure is critical in ensuring sufficient optimization progress on the one hand and in preserving genetic diversity to be able to escape from local optima on the other. We propose a new selection scheme, which is uniform in the fitness values. It generates selection pressure towards sparsely populated fitness regions, not necessarily towards higher fitness, as is the case for all other selection schemes. We show that the new selection scheme can be much more effective than standard selection schemes.", znote = "Acceptance rate: 264/372 = 71\%", }

@Article{Hutter:02fast, author = "Marcus Hutter", title = "The Fastest and Shortest Algorithm for All Well-Defined Problems", journal = "International Journal of Foundations of Computer Science", publisher = "World Scientific", volume = "13", number = "3", pages = "431--443", year = "2002", keywords = "Acceleration, Computational Complexity, Algorithmic Information Theory, Kolmogorov Complexity, Blum's Speed-up Theorem, Levin Search.", http = "http://www.hutter1.net/ai/pfastprg.htm", url = "http://arxiv.org/abs/cs.CC/0206022", abstract = "An algorithm M is described that solves any well-defined problem p as quickly as the fastest algorithm computing a solution to p, save for a factor of 5 and low-order additive terms. M optimally distributes resources between the execution of provably correct p-solving programs and an enumeration of all proofs, including relevant proofs of program correctness and of time bounds on program runtimes. M avoids Blum's speed-up theorem by ignoring programs without correctness proof. M has broader applicability and can be faster than Levin's universal search, the fastest method for inverting functions save for a large multiplicative constant. An extension of Kolmogorov complexity and two novel natural measures of function complexity are used to show that the most efficient program computing some function f is also among the shortest programs provably computing f.", press = "http://guide.supereva.it/c_/interventi/2001/04/38469.shtml", }

@Article{Hutter:02uspatent, author = "Marcus Hutter", title = "System and method for analysing and displaying two- or three-dimensional sets of data", volume = "number US2002041701, pages 1--15", journal = "{\rm BrainLAB}, US patent", year = "2002", url = "http://l2.espacenet.com/espacenet/bnsviewer?CY=ep&LG=en&DB=EPD&PN=US2002041701&ID=US2002041701A1+I+", }

## %-------------Publications-of-Marcus-Hutter-2001--------------%

@Article{Hutter:01eupatent, author = "Marcus Hutter", title = "{S}tufenfreie {D}arstellung von zwei- oder dreidimensionalen Datens{\"a}tzen durch kr{\"u}mmungsminimierende {V}erschiebung von {P}ixelwerten", volume = "number EP1184812, pages 1--19", journal = "{\rm BrainLAB}, EU patent", year = "2001", url = "http://l2.espacenet.com/espacenet/bnsviewer?CY=ep&LG=en&DB=EPD&PN=EP1184812&ID=EP+++1184812A1+I+", }

@InProceedings{Hutter:01market, author = "Ivo Kwee and Marcus Hutter and J{\"u}rgen Schmidhuber", title = "Market-Based Reinforcement Learning in Partially Observable Worlds", address = "Vienna", _month = aug, year = "2001", pages = "865--873", booktitle = "Proc. International Conf. on Artificial Neural Networks (ICANN-2001)", _journal = "Artificial Neural Networks (ICANN-2001)", _editor = "Georg Dorffner and Horst Bishof and Kurt Hornik", publisher = "Springer", series = "LNCS", volume = "2130", http = "http://www.hutter1.net/ai/pmarket.htm", url = "http://arxiv.org/abs/cs.AI/0105025", categories = "I.2. [Artificial Intelligence]", keywords = "Hayek system; reinforcement learning; partial observable environment", abstract = "Unlike traditional reinforcement learning (RL), market-based RL is in principle applicable to worlds described by partially observable Markov Decision Processes (POMDPs), where an agent needs to learn short-term memories of relevant previous events in order to execute optimal actions. Most previous work, however, has focused on reactive settings (MDPs) instead of POMDPs. Here we reimplement a recent approach to market-based RL and for the first time evaluate it in a toy POMDP setting.", znote = "Acceptance rate: 171/300 = 57\%", }

@InProceedings{Hutter:01loss, author = "Marcus Hutter", title = "General Loss Bounds for Universal Sequence Prediction", year = "2001", pages = "210--217", booktitle = "Proc. 18th International Conf. on Machine Learning (ICML-2001)", address = "Williamstown, MA", _editor = "Carla. E. Brodley and Andrea Pohoreckyj Danyluk", publisher = "Morgan Kaufmann", isbn = "1-55860-778-1", ISSN = "1049-1910", http = "http://www.hutter1.net/ai/ploss.htm", url = "http://arxiv.org/abs/cs.AI/0101019", categories = "I.2. [Artificial Intelligence], I.2.6. [Learning], I.2.8. [Problem Solving, Control Methods and Search], F.1.3. [Complexity Classes].", keywords = "Bayesian and deterministic prediction; general loss function; Solomonoff induction; Kolmogorov complexity; leaning; universal probability; loss bounds; games of chance; partial and delayed prediction; classification.", abstract = "The Bayesian framework is ideally suited for induction problems. The probability of observing $x_k$ at time $k$, given past observations $x_1...x_{k-1}$ can be computed with Bayes rule if the true distribution $\mu$ of the sequences $x_1x_2x_3...$ is known. The problem, however, is that in many cases one does not even have a reasonable estimate of the true distribution. In order to overcome this problem a universal distribution $\xi$ is defined as a weighted sum of distributions $\mu_i\in M$, where $M$ is any countable set of distributions including $\mu$. This is a generalization of Solomonoff induction, in which $M$ is the set of all enumerable semi-measures. Systems which predict $y_k$, given $x_1...x_{k-1}$ and which receive loss $l_{x_k y_k}$ if $x_k$ is the true next symbol of the sequence are considered. It is proven that using the universal $\xi$ as a prior is nearly as good as using the unknown true distribution $\mu$. Furthermore, games of chance, defined as a sequence of bets, observations, and rewards are studied. The time needed to reach the winning zone is estimated. Extensions to arbitrary alphabets, partial and delayed prediction, and more active systems are discussed.", znote = "Acceptance rate: 80/249 = 32\%", }

@InProceedings{Hutter:01alpha, author = "Marcus Hutter", title = "Convergence and Error bounds for Universal Prediction of Nonbinary Sequences", booktitle = "Proc. 12th European Conf. on Machine Learning (ECML-2001)", address = "Freiburg, Germany", _editor = "Luc De Raedt and Peter Flach", publisher = "Springer", series = "LNAI", volume = "2167", isbn = "3-540-42536-5", _month = dec, year = "2001", pages = "239--250", http = "http://www.hutter1.net/ai/palpha.htm", url = "http://arxiv.org/abs/cs.LG/0106036", keywords = "Induction; Solomonoff, Bayesian, deterministic prediction; Kolmogorov complexity; leaning; Loss function; algorithmic information theory; universal probability", abstract = "Solomonoff's uncomputable universal prediction scheme $\xi$ allows to predict the next symbol $x_k$ of a sequence $x_1...x_{k-1}$ for any Turing computable, but otherwise unknown, probabilistic environment $\mu$. This scheme will be generalized to arbitrary environmental classes, which, among others, allows the construction of computable universal prediction schemes $\xi$. Convergence of $\xi$ to $\mu$ in a conditional mean squared sense and with $\mu$ probability $1$ is proven. It is shown that the average number of prediction errors made by the universal $\xi$ scheme rapidly converges to those made by the best possible informed $\mu$ scheme. The schemes, theorems and proofs are given for general finite alphabet, which results in additional complications as compared to the binary case. Several extensions of the presented theory and results are outlined. They include general loss functions and bounds, games of chance, infinite alphabet, partial and delayed prediction, classification, and more active systems.", znote = "Acceptance rate: 90/240 = 37\% (includes PKDD)", }

@InProceedings{Hutter:01grep, author = "Ivo Kwee and Marcus Hutter and J{\"u}rgen Schmidhuber", title = "Gradient-based Reinforcement Planning in Policy-Search Methods", year = "2001", pages = "27--29", address = "Utrecht, The Netherlands", booktitle = "Proc. 5th European Workshop on Reinforcement Learning (EWRL-5)", volume = "27", _editor = "Marco A. Wiering", publisher = "Onderwijsinsituut CKI, Utrecht Univ.", _series = "Cognitieve Kunstmatige Intelligentie", isbn = "90-393-2874-9", ISSN = "1389-5184", keywords = "Artificial intelligence, reinforcement learning, direct policy search, planning, gradient decent.", http = "http://www.hutter1.net/ai/pgrep.htm", url = "http://arxiv.org/abs/cs.AI/0111060", categories = "I.2. [Artificial Intelligence], I.2.6. [Learning], I.2.8. [Problem Solving, Control Methods and Search]", abstract = "We introduce a learning method called ``gradient-based reinforcement planning'' (GREP). Unlike traditional DP methods that improve their policy backwards in time, GREP is a gradient-based method that plans ahead and improves its policy {\em before} it actually acts in the environment. We derive formulas for the exact policy gradient that maximizes the expected future reward and confirm our ideas with numerical experiments.", }

@InProceedings{Hutter:01decision, author = "Marcus Hutter", title = "Universal Sequential Decisions in Unknown Environments", year = "2001", pages = "25--26", address = "Utrecht, The Netherlands", booktitle = "Proc. 5th European Workshop on Reinforcement Learning (EWRL-5)", volume = "27", _editor = "Marco A. Wiering", publisher = "Onderwijsinsituut CKI, Utrecht Univ.", _series = "Cognitieve Kunstmatige Intelligentie", isbn = "90-393-2874-9", ISSN = "1389-5184", keywords = "Artificial intelligence, Rational agents, sequential decision theory, universal Solomonoff induction, algorithmic probability, reinforcement learning, computational complexity, Kolmogorov complexity.", url = "http://www.hutter1.net/ai/pdecision.htm", categories = "I.2. [Artificial Intelligence], I.2.6. [Learning], I.2.8. [Problem Solving, Control Methods and Search], F.1.3. [Complexity Classes], F.2. [Analysis of Algorithms and Problem Complexity]", abstract = "We give a brief introduction to the AIXI model, which unifies and overcomes the limitations of sequential decision theory and universal Solomonoff induction. While the former theory is suited for active agents in known environments, the latter is suited for passive prediction of unknown environments.", abstract2 = "Decision theory formally solves the problem of rational agents in uncertain worlds if the true environmental probability distribution is known. Solomonoff's theory of universal induction formally solves the problem of sequence prediction for unknown distribution. We unify both theories and give strong arguments that the resulting universal AIXI model behaves optimal in any computable environment.", }

@InProceedings{Hutter:01aixi, author = "Marcus Hutter", title = "Towards a Universal Theory of Artificial Intelligence based on Algorithmic Probability and Sequential Decisions", year = "2001", pages = "226--238", booktitle = "Proc. 12th European Conf. on Machine Learning (ECML-2001)", address = "Freiburg, Germany", _editor = "Luc De Raedt and Peter Flach", publisher = "Springer", series = "LNAI", volume = "2167", isbn = "3-540-42536-5", keywords = "Artificial intelligence, Rational agents, sequential decision theory, universal Solomonoff induction, algorithmic probability, reinforcement learning, computational complexity, theorem proving, probabilistic reasoning, Kolmogorov complexity, Levin search.", http = "http://www.hutter1.net/ai/paixi.htm", url = "http://arxiv.org/abs/cs.AI/0012011", categories = "I.2. [Artificial Intelligence], I.2.3. [Deduction and Theorem Proving], I.2.6. [Learning], I.2.8. [Problem Solving, Control Methods and Search], F.1.3. [Complexity Classes], F.2. [Analysis of Algorithms and Problem Complexity]", abstract = "Decision theory formally solves the problem of rational agents in uncertain worlds if the true environmental probability distribution is known. Solomonoff's theory of universal induction formally solves the problem of sequence prediction for unknown distribution. We unify both theories and give strong arguments that the resulting universal AIXI model behaves optimally in any computable environment. The major drawback of the AIXI model is that it is uncomputable. To overcome this problem, we construct a modified algorithm AIXI^tl, which is still superior to any other time t and space l bounded agent. The computation time of AIXI^tl is of the order t x 2^l.", znote = "Acceptance rate: 90/240 = 37\% (includes PKDD)", }

@Article{Hutter:01errbnd, author = "Marcus Hutter", title = "New Error Bounds for {Solomonoff} Prediction", year = "2001", volume = "62", number = "4", pages = "653--667", journal = "Journal of Computer and System Sciences", address = "Manno(Lugano), Switzerland", keywords = "Kolmogorov Complexity, Solomonoff Prediction, Error Bound, Induction, Learning, Algorithmic Information Theory, Bayes", http = "http://www.hutter1.net/ai/perrbnd.htm", url = "http://arxiv.org/abs/cs.AI/9912008", abstract = "Several new relations between Solomonoff prediction and Bayesian prediction and general probabilistic prediction schemes will be proved. Among others they show that the number of errors in Solomonoff prediction is finite for computable prior probability, if finite in the Bayesian case. Deterministic variants will also be studied. The most interesting result is that the deterministic variant of Solomonoff prediction is optimal compared to any other probabilistic or deterministic prediction scheme apart from additive square root corrections only. This makes it well suited even for difficult prediction problems, where it does not suffice when the number of errors is minimal to within some factor greater than one. Solomonoff's original bound and the ones presented here complement each other in a useful way.", }

## %-------------Publications-of-Marcus-Hutter-2000--------------%

@Article{Hutter:00speed, author = "Marcus Hutter", title = "An effective Procedure for Speeding up Algorithms", year = "10 pages, 2001", journal = "Presented at the 3rd Workshop on Algorithmic Information Theory (TAI-2001)", http = "http://www.hutter1.net/ai/pspeed.htm", url = "http://arxiv.org/abs/cs.CC/0102018", keywords = "Acceleration, Computational Complexity, Algorithmic Information Theory, Blum's Speed-up, Levin Search.", abstract = "The provably asymptotically fastest algorithm within a factor of 5 for formally described problems will be constructed. The main idea is to enumerate all programs provably equivalent to the original problem by enumerating all proofs. The algorithm could be interpreted as a generalization and improvement of Levin search, which is, within a multiplicative constant, the fastest algorithm for inverting functions. Blum's speed-up theorem is avoided by taking into account only programs for which a correctness proof exists. Furthermore, it is shown that the fastest program that computes a certain function is also one of the shortest programs provably computing this function. To quantify this statement, the definition of Kolmogorov complexity is extended, and two new natural measures for the complexity of a function are defined.", }

@TechReport{Hutter:00kcunai, author = "Marcus Hutter", title = "A Theory of Universal Artificial Intelligence based on Algorithmic Complexity", number = "cs.AI/0004001", _month = apr, year = "2000", institution = "M{\"u}nchen, 62 pages", keywords = "Artificial intelligence, algorithmic complexity, sequential decision theory; induction; Solomonoff; Kolmogorov; Bayes; reinforcement learning; universal sequence prediction; strategic games; function minimization; supervised learning.", url = "http://arxiv.org/abs/cs.AI/0004001", http = "http://www.hutter1.net/ai/pkcunai.htm", abstract = "Decision theory formally solves the problem of rational agents in uncertain worlds if the true environmental prior probability distribution is known. Solomonoff's theory of universal induction formally solves the problem of sequence prediction for unknown prior distribution. We combine both ideas and get a parameterless theory of universal Artificial Intelligence. We give strong arguments that the resulting AIXI model is the most intelligent unbiased agent possible. We outline for a number of problem classes, including sequence prediction, strategic games, function minimization, reinforcement and supervised learning, how the AIXI model can formally solve them. The major drawback of the AIXI model is that it is uncomputable. To overcome this problem, we construct a modified algorithm AIXI-tl, which is still effectively more intelligent than any other time t and space l bounded agent. The computation time of AIXI-tl is of the order tx2^l. Other discussed topics are formal definitions of intelligence order relations, the horizon problem and relations of the AIXI theory to other AI approaches.", note = "http://arxiv.org/abs/cs.AI/0004001", }

## %----------Publications-of-Marcus-Hutter-1987-1999------------%

@Article{Hutter:97instanto, author = "Marcus Hutter", title = "Instantons and Meson Correlators in {QCD}", year = "1997", pages = "131--143", journal = "Zeitschrift f{\"u}r Physik C Particle and Fields", volume = "74", number = "1", issn = "0170-9739", doi = "10.1007/s002880050376", url = "http://arxiv.org/abs/hep-ph/9501245", http = "http://www.hutter1.net/physics/pinstant.htm", abstract = "Various QCD correlators are calculated in the instanton liquid model in zeromode approximation and $1/N_c$ expansion. Previous works are extended by including dynamical quark loops. In contrast to the original ``perturbative'' $1/N_c$ expansion not all quark loops are suppressed. In the flavor singlet meson correlators a chain of quark bubbles survives the $N_c\to\infty$ limit causing a massive $\eta^\prime$ in the pseudoscalar correlator while keeping massless pions in the triplet correlator. The correlators are plotted and meson masses and couplings are obtained from a spectral fit. They are compared to the values obtained from numerical studies of the instanton liquid and to experimental results.", }

@Article{Hutter:97family, author = "Andreas Blumhofer and Marcus Hutter", title = "Family Structure from Periodic Solutions of an Improved Gap Equation", journal = "Nuclear Physics", volume = "B484", year = "1997", pages = "80--96", doi = "10.1016/S0550-3213(96)00644-X", issn = "0550-3213", url = "http://arxiv.org/abs/hep-ph/9605393", http = "http://www.hutter1.net/physics/pfamily.htm", abstract = "Fermion mass models usually contain a horizontal symmetry and therefore fail to predict the exponential mass spectrum of the Standard Model in a natural way. In dynamical symmetry breaking there are different concepts to introduce a fermion mass spectrum, which automatically has the desired hierarchy. In constructing a specific model we show that in some modified gap equations periodic solutions with several fermion poles appear. The stability of these excitations and the application of this toy model are discussed. The mass ratios turn out to be approximately e^pi and e^2pi. Thus the model explains the large ratios of fermion masses between successive generations in the Standard Model without introducing large or small numbers by hand.", note = "Missing figures in B494 (1997) 485", }

@PhdThesis{Hutter:96thesis, author = "Marcus Hutter", school = "Faculty for Theoretical Physics, LMU Munich", title = "Instantons in QCD: Theory and application of the instanton liquid model", year = "1996", pages = "1--100", url = "http://arxiv.org/abs/hep-ph/0107098 ", http = "http://www.hutter1.net/physics/pdise.htm", abstract = "Numerical and analytical studies of the instanton liquid model have allowed the determination of many hadronic parameters during the last 13 years. Most part of this thesis is devoted to the extension of the analytical methods. The meson correlation (polarization) functions are calculated in the instanton liquid model including dynamical quark loops. The correlators are plotted and masses and couplings of the sigma, rho, omega, a1 and f1 are obtained from a spectral fit. A separated analysis allows the determination of the eta' mass too. The results agree with the experimental values on a 10% level. Further I give some predictions for the proton form factors, which are related to the proton spin (problem). A gauge invariant gluon mass for small momenta is also calculated. At the end of the work some predictions are given, which do not rely on the instanton liquid model. A gauge invariant quark propagator is calculated in the one instanton background and is compared to the regular and singular propagator. An introduction to the skill of choosing a suitable gauge, especially a criterion for choosing regular or singular gauge, is given. An application is the derivation of a finite relation between the quark condensate and the QCD scale Lambda, where neither an infrared cutoff nor a specific instanton model has been used. In general the instanton liquid model exhibits an astonishing internal consistency and a good agreement with the experimental data.", note = "Translated from the German original http://www.hutter1.net/physics/pdiss.htm", }

@PhdThesis{Hutter:96diss, author = "Marcus Hutter", school = "Fakult{\"a}t f{\"u}r Theoretische Physik, LMU M{\"u}nchen", title = "Instantonen in der QCD: Theorie und Anwendungen des Instanton-Fl{\"u}ssigkeit-Modells", year = "1996", pages = "1--105", url = "http://arxiv.org/abs/hep-ph/9603280", http = "http://www.hutter1.net/physics/pdiss.htm", abstract = "Durch numerische Simulation des Instanton-Flüssigkeit-Modells konnten eine Reihe hadronischer Größen in den letzten 13 Jahren bestimmt werden. Der größte Teil dieser Arbeit ist der Erweiterung der analytischen Methoden gewidmet. Die Meson-Korrelatoren (auch Polarisations-Funktionen genannt) werden im Instanton-Flüssigkeits-Modell berechnet, wobei dynamische Quark-Schleifen berücksichtigt werden. Die Korrelatoren werden grafisch dargestellt und die Massen und Kopplungen der sigma, rho, omega, a1 und f1 Mesonen werden mit Hilfe eines spektralen Fits bestimmt. Eine gesonderte Betrachtung ermöglicht auch die Berechnung der eta' Masse. Die Ergebnisse stimmen auf 10% Niveau mit den experimentellen Werten überein. Weiterhin wird versucht, die axialen Formfaktoren des Protons zu bestimmen. Diese stehen in Zusammenhang mit dem Proton-Spin(-Problem). Eine eichinvariante Gluon-Masse wird für kleine Impulse berechnet. Die Arbeit wird abgeschlossen mit einigen Vorhersagen, die sich nicht speziell auf das Instanton-Flüssigkeits-Modell stützen. Im ein-Instanton-Vakuum wird ein eichinvarianter Quark-Propagator berechnet und mit dem regulüren und dem singulären Propagator verglichen. Kriterien für die Wahl einer geeignete Eichung, insbesondere für die Wahl der singulären oder der regulüren Eichung, werden gegeben. Eine Anwendung ist die Herleitung einer endlichen Relation zwischen dem Quark-Kondensat und der QCD-Skala Lambda, wobei weder ein Infrarot-Cutoff noch ein spezifisches Instanton-Modell verwendet werden. Allgemein weist das Instanton-Flüssigkeits-Modell eine erstaunliche interne Konsistenz und gute Übereinstimmung mit experimentellen Daten auf.", note = "English translation available at http://www.hutter1.net/physics/pdise.htm", }

@Article{Hutter:96eta, author = "Marcus Hutter", title = "The mass of the $\eta'$ in self-dual {QCD}", year = "1996", pages = "275--278", journal = "Physics Letters B", volume = "B367", issn = "0370-2693", doi = "10.1016/0370-2693(95)01411-X", url = "http://arxiv.org/abs/hep-ph/9509401", http = "http://www.hutter1.net/physics/petamas.htm", abstract = "The QCD gauge field is modeled as an ensemble of statistically independent selfdual and antiselfdual regions. This model is motivated from instanton physics. The scale anomaly then allows to relate the topological susceptibility to the gluon condensate. With the help of Wittens formula for m_eta' and an estimate of the suppression of the gluon condensate due to light quarks the mass of the eta' can be related to f_pi and the physical gluon condensate. We get the quite satisfactory value m_eta'=884+-116 MeV. Using the physical eta' mass as an input it is in principle possible to get information about the interaction between instantons and anti-instantons.", }

@TechReport{Hutter:95spin, author = "Marcus Hutter", number = "LMU-95-15", institution = "Theoretische Physik, LMU M{\"u}nchen", title = "Proton Spin in the Instanton Background", year = "1995", url = "http://arxiv.org/abs/hep-ph/9509402", http = "http://www.hutter1.net/physics/pspin.htm", abstract = "The proton form factors are reduced to vacuum correlators of 4 quark fields by assuming independent constituent quarks. The axial singlet quark and gluonic form factors are calculated in the instanton liquid model. A discussion of gauge(in)dependence is given.", note = "15 pages", }

@TechReport{Hutter:95prop, author = "Marcus Hutter", number = "LMU-95-03", institution = "Theoretische Physik, LMU M{\"u}nchen", title = "Gauge Invariant Quark Propagator in the Instanton Background", year = "1995", url = "http://arxiv.org/abs/hep-ph/9502361", http = "http://www.hutter1.net/physics/pprop.htm", abstract = "After a general discussion on the choice of gauge, we compare the quark propagator in the background of one instanton in regular and singular gauge with a gauge invariant propagator obtained by inserting a path-ordered gluon exponential. Using a gauge motivated by this analysis, we were able to obtain a finite result for the quark condensate without introducing an infrared cutoff nor invoking some instanton model.", note = "15 pages", }

@TechReport{Hutter:93gluon, author = "Marcus Hutter", number = "LMU-93-18", institution = "Theoretische Physik, LMU M{\"u}nchen", title = "Gluon Mass from Instantons", year = "1993", url = "http://arxiv.org/abs/hep-ph/9501335", http = "http://www.hutter1.net/physics/pgluon.htm", abstract = "The gluon propagator is calculated in the instanton background in a form appropriate for extracting the momentum dependent gluon mass. In background-xi-gauge we get for the mass 400 MeV for small p^2 independent of the gauge parameter xi.", note = "13 pages", }

@MastersThesis{Hutter:91cfs, author = "Marcus Hutter", school = "Theoretische Informatik, TU M{\"u}nchen", title = "{I}mplementierung eines {K}lassifizierungs-{S}ystems", year = "1991", url = "http://www.hutter1.net/ai/pcfs.htm", ps = "http://www.hutter1.net/ai/pcfs.ps", pdf = "http://www.hutter1.net/ai/pcfs.pdf", code = "http://www.hutter1.net/ai/cfssim.c", codex = "http://www.hutter1.net/ai/cfsexmpl.c", abstract = "A classifier system is a massively parallel rule based system, whose components (classifier) can exchange messages, whose behavior is is assessed by a teacher (reinforcement), and which is able to learn by means of credit assignment and a genetic algorithm. For an introduction we have to refer to the, meanwhile extensive, literature; see especially Goldberg (1989). The concept of a classifier system was first developed by Holland (1986), but meanwhile a multitude of variants and extensions exist (Booker et. al, 1989). So far it is impossible to compare these variants in their performance, statements on the quality of the various approaches are, hence, hard to impossible. The program developed in this diploma thesis allows, for the first time, a direct comparison of the most important variants. The thesis describes the program, in which we have taken special attention to an efficient implementation.", zusammenfassung = "Ein Klassifizierungssystem (CFS, engl. Classifiersystem) ist ein massiv paralleles regelbasiertes System, dessen Komponenten (Classifier) Nachrichten (Messages) austauschen können, dessen Verhalten von einem Lehrer beurteilt wird (Reinforcement) und das mittels Credit-Assignment und genetischen Algorithmen fähig ist zu lernen. Für eine einführende Darstellung muß auf die inzwischen sehr umfangreiche Literatur, insbesondere Goldberg (1989), verwiesen werden. Das Konzept des CFS wurde zuerst von Holland (1986) entwickelt, inzwischen gibt es aber eine Vielzahl von Varianten und Erweiterungen (Booker et. al (1989). Bisher ist es nicht möglich, diese Varianten in ihrer Performance zu vergleichen, eine Aussage über die Güte der verschiedenen Ansätze ist somit kaum oder überhaupt nicht möglich. Das in dieser Diplomarbeit erstellte Programm gestattet erstmals bzgl. der wichtigsten Varianten einen direkten Vergleich. In den folgenden Kapiteln wird dieses Programm, bei dem besonders auf eine effiziente Implementierung geachtet wurde, beschrieben.", note = "72 pages with C listing, in German", }

@TechReport{Hutter:90faka, author = "Marcus Hutter", institution = "Universit{\"a}t Erlangen-N{\"u}rnberg \& Technische Universit{\"a}t M{\"u}nchen", title = "{P}arallele {A}lgorithmen in der {S}tr{\"o}mungsmechanik", type = "{F}erienakademie: {N}umerische {M}ethoden der {S}tr{\"o}mungsmechanik", year = "1990", url = "http://www.hutter1.net/official/faka.htm", note = "10 pages, in German", }

@TechReport{Hutter:90fopra, author = "Marcus Hutter", institution = "Theoretische Informatik, TU M{\"u}nchen", title = "A Reinforcement Learning {H}ebb Net", year = "1990", type = "Fortgeschrittenenpraktikum", url = "http://www.hutter1.net/ai/fopra.htm", ftp = "http://www.hutter1.net/ai/fopra.ps.zip", pdf = "http://www.hutter1.net/ai/fopra.pdf", code = "http://www.hutter1.net/ai/fopra.pas", abstract = "This Fopra is motivated by the following observations about human learning and about human neural information processing. On the one hand humans are able to learn supervised, unsupervised and by reinforcement, on the other hand there is no neural distinction between informative, uninformative and evaluative feedback. Furthermore, the Hebb learning rule is the only biological inspired learning mechanism. If the human brain is indeed a Hebb net this would imply that Hebb nets are able to learn by reinforcement. The goal of this Fopra is to investigate whether and how Hebb nets could be used for reinforcement learning. It is shown that Hebb nets with a suitable prior net topology can indeed learn, at least simple tasks, by reinforcement.", note = "30 pages with Pascal listing, in German", }

@Article{Hutter:87cad, author = "Marcus Hutter", title = "Fantastische {3D-Graphik} mit dem {CPC-Giga-CAD}", journal = "7. Schneider Sonderheft, Happy Computer, Sonderheft 16", publisher = "Markt\&Technik", year = "1987", pages = "41--92", url = "http://www.hutter1.net/gigacad/gigacad.htm", abstract = "CAD steht fur Computer Aided Design. Bis heute war dieses Gebiet hauptsächlich Domäne der Großrechner. Mit $\gg$CPC-Giga-CAD$\ll$ wird auch auf dem Schneider CPC automatisiertes und computergestütztes Zeichnen und Konstruieren zum Kinderspiel.", }

© 2000 by ... | [home] [search] [science] [contact] [up] | ... Marcus Hutter |