@inproceedings { Ferd1706:Sparse,
author = { Sohrab Ferdowsi and Sviatoslav Voloshynovskiy and Dimche Kostadinov and Taras Holotyak },
title = { Sparse Ternary Codes for similarity search have higher coding gain than dense binary codes },
booktitle = { 2017 IEEE International Symposium on Information Theory (ISIT) (ISIT'2017) },
address = { Aachen, Germany },
days = { 24 },
month = { june },
year = { 2017 },
keywords = { pproximate Nearest Neighbor search; content identification; binary hashing; coding gain; sparse representation },
abstract = { This paper addresses the problem of Approximate Nearest Neighbor (ANN) search in pattern recognition where feature vectors in a database are encoded as compact codes in order to speed-up the similarity search in large-scale databases. Considering the ANN problem from an information-theoretic perspective, we interpret it as an encoding which maps the original feature vectors to a less-entropic sparse representation while requiring them to be as informative as possible. We then define the coding gain for ANN search using information-theoretic measures. We next show that the classical approach to this problem which consists of binarization of the projected vectors is sub-optimal. Instead, we show that a recently proposed ternary encoding achieves higher coding gains. }
}