1#include "file.h"
2#include "input.h"
3#include "timer.h"
4
5#include "absl/container/flat_hash_set.h"
6
7int main(int argc, char* argv[])
8{
9  setlocale(LC_NUMERIC, "");
10
11  bool combine = false;
12  bool sequential = false;
13  int buffer_size = kBufferSize;
14  int opt;
15  while ((opt = getopt(argc, argv, "b:cs")) != -1)
16  {
17    switch (opt)
18    {
19      case 'b':
20        buffer_size = atoi(optarg);
21        break;
22      case 'c':
23        combine = true;
24        break;
25      case 's':
26        sequential = true;
27        break;
28    }
29  }
30
31  LOG_INFO << "Reading " << argc - optind << (combine ? " segment " : "") << " files "
32      << (sequential ? "sequentially" : "randomly")
33      << ", buffer size " << buffer_size;
34  Timer timer;
35  int64_t total = 0;
36  int64_t lines = 0;
37  int64_t count = 0;
38
39  if (combine)
40  {
41  std::vector<std::unique_ptr<SegmentInput>> inputs;
42  inputs.reserve(argc - optind);
43  for (int i = optind; i < argc; ++i)
44  {
45    inputs.emplace_back(new SegmentInput(argv[i], buffer_size));
46  }
47
48  if (sequential)
49  {
50    for (const auto& input : inputs)
51    {
52      Timer t;
53      //std::string line;
54      while (input->next())
55      {
56        count += input->current_count();
57        ++lines;
58      }
59      int64_t len = input->tell();
60      LOG_INFO << "Done " << input->filename() << " " << t.report(len);
61      total += len;
62    }
63  }
64  else
65  {
66  }
67  }
68  else
69  {
70  std::vector<std::unique_ptr<InputFile>> files;
71  files.reserve(argc - optind);
72  for (int i = optind; i < argc; ++i)
73  {
74    files.emplace_back(new InputFile(argv[i], buffer_size));
75  }
76
77  if (sequential)
78  {
79    for (const auto& file : files)
80    {
81      Timer t;
82      std::string line;
83      while (file->getline(&line))
84      {
85        ++lines;
86      }
87      int64_t len = file->tell();
88      LOG_DEBUG << "Done " << file->filename() << " " << t.report(len);
89      total += len;
90    }
91  }
92  else
93  {
94    std::string line;
95    absl::flat_hash_set<InputFile*> toRemove;
96    while (!files.empty())
97    {
98      toRemove.clear();
99      // read one line from each file
100      for (const auto& file : files)
101      {
102        if (file->getline(&line))
103        {
104          ++lines;
105        }
106        else
107        {
108          toRemove.insert(file.get());
109        }
110      }
111      if (!toRemove.empty())
112      {
113        for (auto* f : toRemove)
114        {
115          total += f->tell();
116          LOG_DEBUG << "Done " << f->filename();
117        }
118        // std::partition?
119        auto it = std::remove_if(files.begin(), files.end(),
120                                 [&toRemove] (const auto& f) { return toRemove.count(f.get()) > 0; });
121        assert(files.end() - it == toRemove.size());
122        files.erase(it, files.end());
123      }
124    }
125  }
126  }
127
128  LOG_INFO << "All done " << timer.report(total) << " "
129      << muduo::Fmt("%'ld", lines) << " lines "
130      << muduo::Fmt("%'ld", count) << " count";
131}
132