1#include "file.h" 2#include "input.h" 3#include "timer.h" 4 5#include "absl/container/flat_hash_set.h" 6 7int main(int argc, char* argv[]) 8{ 9 setlocale(LC_NUMERIC, ""); 10 11 bool combine = false; 12 bool sequential = false; 13 int buffer_size = kBufferSize; 14 int opt; 15 while ((opt = getopt(argc, argv, "b:cs")) != -1) 16 { 17 switch (opt) 18 { 19 case 'b': 20 buffer_size = atoi(optarg); 21 break; 22 case 'c': 23 combine = true; 24 break; 25 case 's': 26 sequential = true; 27 break; 28 } 29 } 30 31 LOG_INFO << "Reading " << argc - optind << (combine ? " segment " : "") << " files " 32 << (sequential ? "sequentially" : "randomly") 33 << ", buffer size " << buffer_size; 34 Timer timer; 35 int64_t total = 0; 36 int64_t lines = 0; 37 int64_t count = 0; 38 39 if (combine) 40 { 41 std::vector<std::unique_ptr<SegmentInput>> inputs; 42 inputs.reserve(argc - optind); 43 for (int i = optind; i < argc; ++i) 44 { 45 inputs.emplace_back(new SegmentInput(argv[i], buffer_size)); 46 } 47 48 if (sequential) 49 { 50 for (const auto& input : inputs) 51 { 52 Timer t; 53 //std::string line; 54 while (input->next()) 55 { 56 count += input->current_count(); 57 ++lines; 58 } 59 int64_t len = input->tell(); 60 LOG_INFO << "Done " << input->filename() << " " << t.report(len); 61 total += len; 62 } 63 } 64 else 65 { 66 } 67 } 68 else 69 { 70 std::vector<std::unique_ptr<InputFile>> files; 71 files.reserve(argc - optind); 72 for (int i = optind; i < argc; ++i) 73 { 74 files.emplace_back(new InputFile(argv[i], buffer_size)); 75 } 76 77 if (sequential) 78 { 79 for (const auto& file : files) 80 { 81 Timer t; 82 std::string line; 83 while (file->getline(&line)) 84 { 85 ++lines; 86 } 87 int64_t len = file->tell(); 88 LOG_DEBUG << "Done " << file->filename() << " " << t.report(len); 89 total += len; 90 } 91 } 92 else 93 { 94 std::string line; 95 absl::flat_hash_set<InputFile*> toRemove; 96 while (!files.empty()) 97 { 98 toRemove.clear(); 99 // read one line from each file 100 for (const auto& file : files) 101 { 102 if (file->getline(&line)) 103 { 104 ++lines; 105 } 106 else 107 { 108 toRemove.insert(file.get()); 109 } 110 } 111 if (!toRemove.empty()) 112 { 113 for (auto* f : toRemove) 114 { 115 total += f->tell(); 116 LOG_DEBUG << "Done " << f->filename(); 117 } 118 // std::partition? 119 auto it = std::remove_if(files.begin(), files.end(), 120 [&toRemove] (const auto& f) { return toRemove.count(f.get()) > 0; }); 121 assert(files.end() - it == toRemove.size()); 122 files.erase(it, files.end()); 123 } 124 } 125 } 126 } 127 128 LOG_INFO << "All done " << timer.report(total) << " " 129 << muduo::Fmt("%'ld", lines) << " lines " 130 << muduo::Fmt("%'ld", count) << " count"; 131} 132