@inproceedings{b0735fcc10344739894bf4451e83e4d7,
title = "An efficient partition-based filtering for similarity joins on mapreduce framework",
abstract = "Similarity join is an important operation in MapReduce framework to find pairs of similar objects like images, video and time series. Since MapReduce basics do not support efficient join processing, the duplicate reduction of candidates and load-balancing among partitions are the major challenges. Recently, many partition based similarity join algorithms have been proposed to solve such problems. However, the existing algorithms still have limitations for supporting efficient join processing over large-scale data set. In this paper, we proposed a similarity join algorithm with an efficient filtering technique on MapReduce to overcome the limitations of traditional partitioning method in two ways: (1) the number of outputs records generated by the filtering matrix reduces duplicates and (2) the estimated join cost generated by using a partition matrix leads to a better load-balance among reducers. Moreover, we have conducted experimental evaluations using sequential data to show the speed-up and scale-up of proposed method.",
keywords = "Join matrix, Load balancing, MapReduce-based similarity join algorithm, Parallel join processing",
author = "Miyoung Jang and Lokhande, \{Archana B.\} and Naeun Baek and Chang, \{Jae Woo\}",
note = "Publisher Copyright: {\textcopyright} Springer Nature Singapore Pte Ltd. 2017.; 12th International Conference on Future Information Technology, FutureTech 2017 ; Conference date: 22-05-2017 Through 24-05-2017",
year = "2017",
doi = "10.1007/978-981-10-5041-1\_84",
language = "English",
isbn = "9789811050404",
series = "Lecture Notes in Electrical Engineering",
publisher = "Springer Verlag",
pages = "528--533",
editor = "Park, \{James J.\} and Shu-Ching Chen and \{Raymond Choo\}, Kim-Kwang",
booktitle = "Advanced Multimedia and Ubiquitous Engineering - MUE/FutureTech 2017",
}