1c377920eSShuo Chen#include "file.h" 2da39c979SShuo Chen#include "input.h" 3c377920eSShuo Chen#include "timer.h" 4c377920eSShuo Chen 5c377920eSShuo Chen#include "absl/container/flat_hash_set.h" 6c377920eSShuo Chen 7c377920eSShuo Chenint main(int argc, char* argv[]) 8c377920eSShuo Chen{ 9c377920eSShuo Chen setlocale(LC_NUMERIC, ""); 10c377920eSShuo Chen 11da39c979SShuo Chen bool combine = false; 12c377920eSShuo Chen bool sequential = false; 136f2e1683SShuo Chen int buffer_size = kBufferSize; 14c377920eSShuo Chen int opt; 15da39c979SShuo Chen while ((opt = getopt(argc, argv, "b:cs")) != -1) 16c377920eSShuo Chen { 17c377920eSShuo Chen switch (opt) 18c377920eSShuo Chen { 19c377920eSShuo Chen case 'b': 20c377920eSShuo Chen buffer_size = atoi(optarg); 21c377920eSShuo Chen break; 22da39c979SShuo Chen case 'c': 23da39c979SShuo Chen combine = true; 24da39c979SShuo Chen break; 25c377920eSShuo Chen case 's': 26c377920eSShuo Chen sequential = true; 27c377920eSShuo Chen break; 28c377920eSShuo Chen } 29c377920eSShuo Chen } 30c377920eSShuo Chen 31da39c979SShuo Chen LOG_INFO << "Reading " << argc - optind << (combine ? " segment " : "") << " files " 32c377920eSShuo Chen << (sequential ? "sequentially" : "randomly") 33c377920eSShuo Chen << ", buffer size " << buffer_size; 34c377920eSShuo Chen Timer timer; 35c377920eSShuo Chen int64_t total = 0; 36c377920eSShuo Chen int64_t lines = 0; 37da39c979SShuo Chen int64_t count = 0; 38da39c979SShuo Chen 39da39c979SShuo Chen if (combine) 40da39c979SShuo Chen { 41da39c979SShuo Chen std::vector<std::unique_ptr<SegmentInput>> inputs; 42da39c979SShuo Chen inputs.reserve(argc - optind); 43da39c979SShuo Chen for (int i = optind; i < argc; ++i) 44da39c979SShuo Chen { 45da39c979SShuo Chen inputs.emplace_back(new SegmentInput(argv[i], buffer_size)); 46da39c979SShuo Chen } 47da39c979SShuo Chen 48da39c979SShuo Chen if (sequential) 49da39c979SShuo Chen { 50da39c979SShuo Chen for (const auto& input : inputs) 51da39c979SShuo Chen { 52da39c979SShuo Chen Timer t; 53da39c979SShuo Chen //std::string line; 54da39c979SShuo Chen while (input->next()) 55da39c979SShuo Chen { 56da39c979SShuo Chen count += input->current_count(); 57da39c979SShuo Chen ++lines; 58da39c979SShuo Chen } 59da39c979SShuo Chen int64_t len = input->tell(); 60da39c979SShuo Chen LOG_INFO << "Done " << input->filename() << " " << t.report(len); 61da39c979SShuo Chen total += len; 62da39c979SShuo Chen } 63da39c979SShuo Chen } 64da39c979SShuo Chen else 65da39c979SShuo Chen { 66da39c979SShuo Chen } 67da39c979SShuo Chen } 68da39c979SShuo Chen else 69da39c979SShuo Chen { 70da39c979SShuo Chen std::vector<std::unique_ptr<InputFile>> files; 71da39c979SShuo Chen files.reserve(argc - optind); 72c377920eSShuo Chen for (int i = optind; i < argc; ++i) 73c377920eSShuo Chen { 74c377920eSShuo Chen files.emplace_back(new InputFile(argv[i], buffer_size)); 75c377920eSShuo Chen } 76c377920eSShuo Chen 77c377920eSShuo Chen if (sequential) 78c377920eSShuo Chen { 79c377920eSShuo Chen for (const auto& file : files) 80c377920eSShuo Chen { 81c377920eSShuo Chen Timer t; 82c377920eSShuo Chen std::string line; 83c377920eSShuo Chen while (file->getline(&line)) 84c377920eSShuo Chen { 85c377920eSShuo Chen ++lines; 86c377920eSShuo Chen } 87c377920eSShuo Chen int64_t len = file->tell(); 88c377920eSShuo Chen LOG_DEBUG << "Done " << file->filename() << " " << t.report(len); 89c377920eSShuo Chen total += len; 90c377920eSShuo Chen } 91c377920eSShuo Chen } 92c377920eSShuo Chen else 93c377920eSShuo Chen { 94c377920eSShuo Chen std::string line; 95c377920eSShuo Chen absl::flat_hash_set<InputFile*> toRemove; 96c377920eSShuo Chen while (!files.empty()) 97c377920eSShuo Chen { 98c377920eSShuo Chen toRemove.clear(); 99c377920eSShuo Chen // read one line from each file 100c377920eSShuo Chen for (const auto& file : files) 101c377920eSShuo Chen { 102c377920eSShuo Chen if (file->getline(&line)) 103c377920eSShuo Chen { 104c377920eSShuo Chen ++lines; 105c377920eSShuo Chen } 106c377920eSShuo Chen else 107c377920eSShuo Chen { 108c377920eSShuo Chen toRemove.insert(file.get()); 109c377920eSShuo Chen } 110c377920eSShuo Chen } 111c377920eSShuo Chen if (!toRemove.empty()) 112c377920eSShuo Chen { 1136f2e1683SShuo Chen for (auto* f : toRemove) 114c377920eSShuo Chen { 115c377920eSShuo Chen total += f->tell(); 116c377920eSShuo Chen LOG_DEBUG << "Done " << f->filename(); 117c377920eSShuo Chen } 118c377920eSShuo Chen // std::partition? 119c377920eSShuo Chen auto it = std::remove_if(files.begin(), files.end(), 120c377920eSShuo Chen [&toRemove] (const auto& f) { return toRemove.count(f.get()) > 0; }); 121c377920eSShuo Chen assert(files.end() - it == toRemove.size()); 122c377920eSShuo Chen files.erase(it, files.end()); 123c377920eSShuo Chen } 124c377920eSShuo Chen } 125c377920eSShuo Chen } 126da39c979SShuo Chen } 127c377920eSShuo Chen 128c377920eSShuo Chen LOG_INFO << "All done " << timer.report(total) << " " 129da39c979SShuo Chen << muduo::Fmt("%'ld", lines) << " lines " 130da39c979SShuo Chen << muduo::Fmt("%'ld", count) << " count"; 131c377920eSShuo Chen} 132