OILS / vendor / souffle / profile / Reader.h View on Github | oilshell.org

487 lines, 312 significant
1/*
2 * Souffle - A Datalog Compiler
3 * Copyright (c) 2016, The Souffle Developers. All rights reserved
4 * Licensed under the Universal Permissive License v 1.0 as shown at:
5 * - https://opensource.org/licenses/UPL
6 * - <souffle root>/licenses/SOUFFLE-UPL.txt
7 */
8
9#pragma once
10
11#include "souffle/profile/Iteration.h"
12#include "souffle/profile/ProfileDatabase.h"
13#include "souffle/profile/ProfileEvent.h"
14#include "souffle/profile/ProgramRun.h"
15#include "souffle/profile/Relation.h"
16#include "souffle/profile/Rule.h"
17#include "souffle/profile/StringUtils.h"
18#include <cassert>
19#include <chrono>
20#include <cstdlib>
21#include <ctime>
22#include <fstream>
23#include <iostream>
24#include <memory>
25#include <string>
26#include <thread>
27#include <unordered_map>
28#include <utility>
29#include <vector>
30#ifndef _MSC_VER
31#include <dirent.h>
32#endif
33#include <sys/stat.h>
34
35namespace souffle {
36namespace profile {
37
38namespace {
39template <typename T>
40class DSNVisitor : public Visitor {
41public:
42 DSNVisitor(T& base) : base(base) {}
43 void visit(TextEntry& text) override {
44 if (text.getKey() == "source-locator") {
45 base.setLocator(text.getText());
46 }
47 }
48 void visit(DurationEntry& duration) override {
49 if (duration.getKey() == "runtime") {
50 base.setStarttime(duration.getStart());
51 base.setEndtime(duration.getEnd());
52 }
53 }
54 void visit(SizeEntry& size) override {
55 if (size.getKey() == "num-tuples") {
56 base.setNumTuples(size.getSize());
57 }
58 }
59 void visit(DirectoryEntry& /* ruleEntry */) override {}
60
61protected:
62 T& base;
63};
64
65/**
66 * Visit ProfileDB atom frequencies.
67 * atomrule : {atom: {num-tuples: num}}
68 */
69class AtomFrequenciesVisitor : public Visitor {
70public:
71 AtomFrequenciesVisitor(Rule& rule) : rule(rule) {}
72 void visit(DirectoryEntry& directory) override {
73 const std::string& clause = directory.getKey();
74
75 for (auto& key : directory.getKeys()) {
76 auto* level = as<SizeEntry>(directory.readDirectoryEntry(key)->readEntry("level"));
77 auto* frequency = as<SizeEntry>(directory.readDirectoryEntry(key)->readEntry("num-tuples"));
78 // Handle older logs
79 std::size_t intFreq = frequency == nullptr ? 0 : frequency->getSize();
80 std::size_t intLevel = level == nullptr ? 0 : level->getSize();
81 rule.addAtomFrequency(clause, key, intLevel, intFreq);
82 }
83 }
84
85private:
86 Rule& rule;
87};
88
89/**
90 * Visit ProfileDB recursive rule.
91 * ruleversion: {DSN}
92 */
93class RecursiveRuleVisitor : public DSNVisitor<Rule> {
94public:
95 RecursiveRuleVisitor(Rule& rule) : DSNVisitor(rule) {}
96 void visit(DirectoryEntry& directory) override {
97 if (directory.getKey() == "atom-frequency") {
98 AtomFrequenciesVisitor atomFrequenciesVisitor(base);
99 for (auto& key : directory.getKeys()) {
100 directory.readDirectoryEntry(key)->accept(atomFrequenciesVisitor);
101 }
102 }
103 }
104};
105
106/**
107 * Visit ProfileDB non-recursive rules.
108 * rule: {versionNum : {DSN}, versionNum+1: {DSN}}
109 */
110class RecursiveRulesVisitor : public Visitor {
111public:
112 RecursiveRulesVisitor(Iteration& iteration, Relation& relation)
113 : iteration(iteration), relation(relation) {}
114 void visit(DirectoryEntry& ruleEntry) override {
115 for (const auto& key : ruleEntry.getKeys()) {
116 auto& versions = *ruleEntry.readDirectoryEntry(key);
117 auto rule = std::make_shared<Rule>(
118 ruleEntry.getKey(), std::stoi(key), relation.createRecID(ruleEntry.getKey()));
119 RecursiveRuleVisitor visitor(*rule);
120 for (const auto& versionKey : versions.getKeys()) {
121 versions.readEntry(versionKey)->accept(visitor);
122 }
123 // To match map keys defined in Iteration::addRule()
124 std::string ruleKey = key + rule->getLocator() + key;
125 iteration.addRule(ruleKey, rule);
126 }
127 }
128
129protected:
130 Iteration& iteration;
131 Relation& relation;
132};
133
134/**
135 * Visit ProfileDB non-recursive rule.
136 * rule: {DSN}
137 */
138class NonRecursiveRuleVisitor : public DSNVisitor<Rule> {
139public:
140 NonRecursiveRuleVisitor(Rule& rule) : DSNVisitor(rule) {}
141 void visit(DirectoryEntry& directory) override {
142 if (directory.getKey() == "atom-frequency") {
143 AtomFrequenciesVisitor atomFrequenciesVisitor(base);
144 for (auto& key : directory.getKeys()) {
145 directory.readDirectoryEntry(key)->accept(atomFrequenciesVisitor);
146 }
147 }
148 }
149};
150
151/**
152 * Visit ProfileDB non-recursive rules.
153 * non-recursive-rule: {rule1: {DSN}, ...}
154 */
155class NonRecursiveRulesVisitor : public Visitor {
156public:
157 NonRecursiveRulesVisitor(Relation& relation) : relation(relation) {}
158 void visit(DirectoryEntry& ruleEntry) override {
159 auto rule = std::make_shared<Rule>(ruleEntry.getKey(), relation.createID());
160 NonRecursiveRuleVisitor visitor(*rule);
161 for (const auto& key : ruleEntry.getKeys()) {
162 ruleEntry.readEntry(key)->accept(visitor);
163 }
164 relation.addRule(rule);
165 }
166
167protected:
168 Relation& relation;
169};
170
171/**
172 * Visit a ProfileDB relation iteration.
173 * iterationNumber: {DSN, recursive-rule: {}}
174 */
175class IterationVisitor : public DSNVisitor<Iteration> {
176public:
177 IterationVisitor(Iteration& iteration, Relation& relation) : DSNVisitor(iteration), relation(relation) {}
178 void visit(DurationEntry& duration) override {
179 if (duration.getKey() == "copytime") {
180 auto copytime = (duration.getEnd() - duration.getStart());
181 base.setCopytime(copytime);
182 }
183 DSNVisitor::visit(duration);
184 }
185 void visit(DirectoryEntry& directory) override {
186 if (directory.getKey() == "recursive-rule") {
187 RecursiveRulesVisitor rulesVisitor(base, relation);
188 for (const auto& key : directory.getKeys()) {
189 directory.readEntry(key)->accept(rulesVisitor);
190 }
191 }
192 if (directory.getKey() == "maxRSS") {
193 auto* preMaxRSS = as<SizeEntry>(directory.readEntry("pre"));
194 auto* postMaxRSS = as<SizeEntry>(directory.readEntry("post"));
195 relation.setPreMaxRSS(preMaxRSS->getSize());
196 relation.setPostMaxRSS(postMaxRSS->getSize());
197 }
198 }
199
200protected:
201 Relation& relation;
202};
203
204/**
205 * Visit ProfileDB iterations.
206 * iteration: {num: {}, num2: {}, ...}
207 */
208class IterationsVisitor : public Visitor {
209public:
210 IterationsVisitor(Relation& relation) : relation(relation) {}
211 void visit(DirectoryEntry& ruleEntry) override {
212 auto iteration = std::make_shared<Iteration>();
213 relation.addIteration(iteration);
214 IterationVisitor visitor(*iteration, relation);
215 for (const auto& key : ruleEntry.getKeys()) {
216 ruleEntry.readEntry(key)->accept(visitor);
217 }
218 }
219
220protected:
221 Relation& relation;
222};
223
224/**
225 * Visit ProfileDB relations.
226 * relname: {DSN, non-recursive-rule: {}, iteration: {...}}
227 */
228class RelationVisitor : public DSNVisitor<Relation> {
229public:
230 RelationVisitor(Relation& relation) : DSNVisitor(relation) {}
231 void visit(DurationEntry& duration) override {
232 if (duration.getKey() == "loadtime") {
233 base.setLoadtime(duration.getStart(), duration.getEnd());
234 } else if (duration.getKey() == "savetime") {
235 auto savetime = (duration.getEnd() - duration.getStart());
236 base.setSavetime(savetime);
237 }
238 DSNVisitor::visit(duration);
239 }
240 void visit(DirectoryEntry& directory) override {
241 if (directory.getKey() == "iteration") {
242 IterationsVisitor iterationsVisitor(base);
243 for (const auto& key : directory.getKeys()) {
244 directory.readEntry(key)->accept(iterationsVisitor);
245 }
246 } else if (directory.getKey() == "non-recursive-rule") {
247 NonRecursiveRulesVisitor rulesVisitor(base);
248 for (const auto& key : directory.getKeys()) {
249 directory.readEntry(key)->accept(rulesVisitor);
250 }
251 } else if (directory.getKey() == "maxRSS") {
252 auto* preMaxRSS = as<SizeEntry>(directory.readEntry("pre"));
253 auto* postMaxRSS = as<SizeEntry>(directory.readEntry("post"));
254 base.setPreMaxRSS(preMaxRSS->getSize());
255 base.setPostMaxRSS(postMaxRSS->getSize());
256 }
257 }
258 void visit(SizeEntry& size) override {
259 if (size.getKey() == "reads") {
260 base.addReads(size.getSize());
261 } else {
262 DSNVisitor::visit(size);
263 }
264 }
265};
266} // namespace
267
268/*
269 * Input reader and processor for log files
270 */
271class Reader {
272private:
273 std::string file_loc;
274 std::streampos gpos;
275 const ProfileDatabase& db = ProfileEventSingleton::instance().getDB();
276 bool loaded = false;
277 bool online{true};
278
279 std::unordered_map<std::string, std::shared_ptr<Relation>> relationMap{};
280 std::unordered_map<std::string, std::unordered_map<std::string, double>> countRecursiveJoinSizeMap{};
281 std::unordered_map<std::string, double> countNonRecursiveJoinSizeMap{};
282 int rel_id{0};
283
284public:
285 std::shared_ptr<ProgramRun> run;
286
287 Reader(std::string filename, std::shared_ptr<ProgramRun> run)
288 : file_loc(std::move(filename)), run(std::move(run)) {
289 try {
290 ProfileEventSingleton::instance().setDBFromFile(file_loc);
291 } catch (const std::exception& e) {
292 fatal("exception whilst reading profile DB: %s", e.what());
293 }
294 }
295
296 Reader(std::shared_ptr<ProgramRun> run) : run(std::move(run)) {}
297 /**
298 * Read the contents from file into the class
299 */
300 void processFile() {
301 rel_id = 0;
302 relationMap.clear();
303 auto programDuration = as<DurationEntry>(db.lookupEntry({"program", "runtime"}));
304 if (programDuration == nullptr) {
305 auto startTimeEntry = as<TimeEntry>(db.lookupEntry({"program", "starttime"}));
306 if (startTimeEntry != nullptr) {
307 run->setStarttime(startTimeEntry->getTime());
308 run->setEndtime(std::chrono::duration_cast<microseconds>(now().time_since_epoch()));
309 loaded = true;
310 }
311 } else {
312 run->setStarttime(programDuration->getStart());
313 run->setEndtime(programDuration->getEnd());
314 online = false;
315 }
316
317 auto prefix = as<DirectoryEntry>(db.lookupEntry({"program", "statistics", "relation"}));
318 if (prefix != nullptr) {
319 for (const auto& rel : prefix->getKeys()) {
320 auto prefixWithRel = as<DirectoryEntry>(
321 db.lookupEntry({"program", "statistics", "relation", rel, "attributes"}));
322 if (prefixWithRel != nullptr) {
323 for (const auto& attributes : prefixWithRel->getKeys()) {
324 auto prefixWithAttributes = as<DirectoryEntry>(db.lookupEntry({"program",
325 "statistics", "relation", rel, "attributes", attributes, "constants"}));
326 if (prefixWithAttributes == nullptr) {
327 continue;
328 }
329 for (const auto& constants : prefixWithAttributes->getKeys()) {
330 auto fullKey = as<TextEntry>(db.lookupEntry({"program", "statistics", "relation",
331 rel, "attributes", attributes, "constants", constants}));
332 if (fullKey != nullptr) {
333 double joinSize = std::stod(fullKey->getText());
334 std::string key = rel + " " + attributes + " " + constants;
335 countNonRecursiveJoinSizeMap[key] = joinSize;
336 }
337 }
338 }
339 }
340
341 auto prefixWithRecursiveRel = as<DirectoryEntry>(
342 db.lookupEntry({"program", "statistics", "relation", rel, "iteration"}));
343 if (prefixWithRecursiveRel != nullptr) {
344 for (const auto& iteration : prefixWithRecursiveRel->getKeys()) {
345 auto prefixWithIteration = as<DirectoryEntry>(db.lookupEntry({"program", "statistics",
346 "relation", rel, "iteration", iteration, "attributes"}));
347 if (prefixWithIteration == nullptr) {
348 continue;
349 }
350
351 for (const auto& attributes : prefixWithIteration->getKeys()) {
352 auto prefixWithAttributes = as<DirectoryEntry>(
353 db.lookupEntry({"program", "statistics", "relation", rel, "iteration",
354 iteration, "attributes", attributes, "constants"}));
355 if (prefixWithAttributes == nullptr) {
356 continue;
357 }
358 for (const auto& constants : prefixWithAttributes->getKeys()) {
359 auto fullKey = as<TextEntry>(db.lookupEntry(
360 {"program", "statistics", "relation", rel, "iteration", iteration,
361 "attributes", attributes, "constants", constants}));
362 double joinSize = std::stod(fullKey->getText());
363 if (fullKey != nullptr) {
364 std::string key = rel + " " + attributes + " " + constants;
365 countRecursiveJoinSizeMap[key][iteration] = joinSize;
366 }
367 }
368 }
369 }
370 }
371 }
372 }
373
374 auto relations = as<DirectoryEntry>(db.lookupEntry({"program", "relation"}));
375 if (relations == nullptr) {
376 // Souffle hasn't generated any profiling information yet
377 // or program is empty.
378 return;
379 }
380 for (const auto& cur : relations->getKeys()) {
381 auto relation = as<DirectoryEntry>(db.lookupEntry({"program", "relation", cur}));
382 if (relation != nullptr) {
383 addRelation(*relation);
384 }
385 }
386 for (const auto& relation : relationMap) {
387 for (const auto& rule : relation.second->getRuleMap()) {
388 for (const auto& atom : rule.second->getAtoms()) {
389 std::string relationName = extractRelationNameFromAtom(atom);
390 relationMap[relationName]->addReads(atom.frequency);
391 }
392 }
393 for (const auto& iteration : relation.second->getIterations()) {
394 for (const auto& rule : iteration->getRules()) {
395 for (const auto& atom : rule.second->getAtoms()) {
396 std::string relationName = extractRelationNameFromAtom(atom);
397 if (relationName.substr(0, 6) == "@delta") {
398 relationName = relationName.substr(7);
399 }
400 if (relationName.substr(0, 4) == "@new") {
401 relationName = relationName.substr(5);
402 }
403 assert(relationMap.count(relationName) > 0 || "Relation name for atom not found");
404 relationMap[relationName]->addReads(atom.frequency);
405 }
406 }
407 }
408 }
409 run->setRelationMap(this->relationMap);
410 loaded = true;
411 }
412
413 void save(std::string f_name);
414
415 inline bool isLive() {
416 return online;
417 }
418
419 bool hasAutoSchedulerStats() {
420 return !countNonRecursiveJoinSizeMap.empty() || !countRecursiveJoinSizeMap.empty();
421 }
422
423 double getNonRecursiveEstimateJoinSize(
424 const std::string& rel, const std::string& attributes, const std::string& constants) {
425 auto key = rel + " " + attributes + " " + constants;
426 return countNonRecursiveJoinSizeMap.at(key);
427 }
428
429 std::size_t getIterations(const std::string& rel) {
430 for (auto& [key, m] : countRecursiveJoinSizeMap) {
431 std::string token = key.substr(0, key.find(" "));
432 if (token == rel) {
433 return m.size();
434 }
435 }
436 assert(false);
437 return 0;
438 }
439
440 double getRecursiveEstimateJoinSize(const std::string& rel, const std::string& attributes,
441 const std::string& constants, const std::string& iteration) {
442 auto key = rel + " " + attributes + " " + constants;
443 auto& m = countRecursiveJoinSizeMap.at(key);
444 return m.at(iteration);
445 }
446
447 void addRelation(const DirectoryEntry& relation) {
448 const std::string& name = cleanRelationName(relation.getKey());
449
450 relationMap.emplace(name, std::make_shared<Relation>(name, createId()));
451 auto& rel = *relationMap[name];
452 RelationVisitor relationVisitor(rel);
453
454 for (const auto& key : relation.getKeys()) {
455 relation.readEntry(key)->accept(relationVisitor);
456 }
457 }
458
459 inline bool isLoaded() {
460 return loaded;
461 }
462
463 std::string RelationcreateId() {
464 return "R" + std::to_string(++rel_id);
465 }
466
467 std::string createId() {
468 return "R" + std::to_string(++rel_id);
469 }
470
471protected:
472 std::string cleanRelationName(const std::string& relationName) {
473 std::string cleanName = relationName;
474 for (auto& cur : cleanName) {
475 if (cur == '-') {
476 cur = '.';
477 }
478 }
479 return cleanName;
480 }
481 std::string extractRelationNameFromAtom(const Atom& atom) {
482 return cleanRelationName(atom.identifier.substr(0, atom.identifier.find('(')));
483 }
484};
485
486} // namespace profile
487} // namespace souffle