Elasticsearch文档去重
过程
安装logstash
vim ./logstash/config/duplicate.conf
input { # Read all documents from Elasticsearch elasticsearch { hosts => "localhost" index => "index1" query => '{ "sort": [ "_doc" ] }' } } # This filter has been updated on February 18, 2019 filter { fingerprint { key => "1234ABCD" method => "SHA256" source => ["col1", "col2", "col3"] target => "[@metadata][generated_id]" concatenate_sources => true # <-- New line added since original post date } } output { stdout { codec => dots } elasticsearch { index => "index1_new" document_id => "%{[@metadata][generated_id]}" } }
./bin/logstash -f ../config/duplicate.conf