From 690ff4c3e44e7063ebde2557b7c0087ab720b894 Mon Sep 17 00:00:00 2001 From: Julian Andres Klode Date: Wed, 14 Aug 2019 14:38:26 +0200 Subject: Add initial support for parsing patterns into parse trees Introduce a parser for patterns that generates a parse tree. The language understood by the parser is: pattern = '?'TERM | '?'TERM '(' pattern (',' pattern)* ','? ')' | WORD | QUOTED-WORD TERM = [0-9a-zA-Z-] WORD = [0-9a-ZA-Z-.*^$\[\]_\\] QUOTED_WORD = "..." # you know what I mean This language is context free, which is a massive simplification from aptitude's language, where ?foo(bar) could have two different meanings depending on whether ?foo takes an argument or not. --- apt-pkg/cachefilter-patterns.cc | 205 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 apt-pkg/cachefilter-patterns.cc (limited to 'apt-pkg/cachefilter-patterns.cc') diff --git a/apt-pkg/cachefilter-patterns.cc b/apt-pkg/cachefilter-patterns.cc new file mode 100644 index 000000000..3c958ebae --- /dev/null +++ b/apt-pkg/cachefilter-patterns.cc @@ -0,0 +1,205 @@ +/* + * cachefilter-patterns.cc - Parser for aptitude-style patterns + * + * Copyright (c) 2019 Canonical Ltd + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include + +namespace APT +{ +namespace Internal +{ + +template +std::string rstrprintf(Args... args) +{ + std::string str; + strprintf(str, std::forward(args)...); + return str; +} + +// Parse a complete pattern, make sure it's the entire input +std::unique_ptr PatternTreeParser::parseTop() +{ + skipSpace(); + auto node = parse(); + skipSpace(); + + if (node->end != sentence.size()) + { + Node node2; + + node2.start = node->end; + node2.end = sentence.size(); + throw Error{node2, "Expected end of file"}; + } + + return node; +} + +// Parse any pattern +std::unique_ptr PatternTreeParser::parse() +{ + std::unique_ptr node; + if ((node = parsePattern()) != nullptr) + return node; + if ((node = parseQuotedWord()) != nullptr) + return node; + if ((node = parseWord()) != nullptr) + return node; + + Node eNode; + eNode.end = eNode.start = state.offset; + throw Error{eNode, "Expected pattern, quoted word, or word"}; +} + +// Parse a list pattern (or function call pattern) +std::unique_ptr PatternTreeParser::parsePattern() +{ + static const APT::StringView CHARS("0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "-"); + if (sentence[state.offset] != '?') + return nullptr; + + auto node = std::make_unique(); + node->end = node->start = state.offset; + state.offset++; + + while (CHARS.find(sentence[state.offset]) != APT::StringView::npos) + { + ++state.offset; + } + + node->term = sentence.substr(node->start, state.offset - node->start); + + node->end = skipSpace(); + // We don't have any arguments, return node; + if (sentence[state.offset] != '(') + return node; + node->end = ++state.offset; + skipSpace(); + + node->haveArgumentList = true; + + // Empty argument list, return + if (sentence[state.offset] == ')') + { + node->end = ++state.offset; + return node; + } + + node->arguments.push_back(parse()); + skipSpace(); + while (sentence[state.offset] == ',') + { + ++state.offset; + skipSpace(); + // This was a trailing comma - allow it and break the loop + if (sentence[state.offset] == ')') + break; + node->arguments.push_back(parse()); + skipSpace(); + } + + if (sentence[state.offset] != ')') + throw Error{*node, rstrprintf("Expected closing parenthesis, received %d", sentence[state.offset])}; + + node->end = ++state.offset; + return node; +} + +// Parse a quoted word atom +std::unique_ptr PatternTreeParser::parseQuotedWord() +{ + if (sentence[state.offset] != '"') + return nullptr; + + auto node = std::make_unique(); + node->start = state.offset; + + // Eat beginning of string + state.offset++; + + while (sentence[state.offset] != '"' && sentence[state.offset] != '\0') + state.offset++; + + // End of string + if (sentence[state.offset] != '"') + throw Error{*node, "Could not find end of string"}; + state.offset++; + + node->end = state.offset; + node->word = sentence.substr(node->start + 1, node->end - node->start - 2); + + return node; +} + +// Parse a bare word atom +std::unique_ptr PatternTreeParser::parseWord() +{ + static const APT::StringView CHARS("0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "-.*^$[]_\\"); + if (CHARS.find(sentence[state.offset]) == APT::StringView::npos) + return nullptr; + + auto node = std::make_unique(); + node->start = state.offset; + + while (CHARS.find(sentence[state.offset]) != APT::StringView::npos) + state.offset++; + + node->end = state.offset; + node->word = sentence.substr(node->start, node->end - node->start); + return node; +} + +// Rendering of the tree in JSON for debugging +std::ostream &PatternTreeParser::PatternNode::render(std::ostream &os) +{ + os << "{" + << "\"term\": \"" << term.to_string() << "\",\n" + << "\"arguments\": [\n"; + for (auto &node : arguments) + node->render(os) << "," << std::endl; + os << "null]\n"; + os << "}\n"; + return os; +} + +std::ostream &PatternTreeParser::WordNode::render(std::ostream &os) +{ + os << '"' << word.to_string() << '"'; + return os; +} + +std::nullptr_t PatternTreeParser::Node::error(std::string message) +{ + throw Error{*this, message}; +} + +bool PatternTreeParser::PatternNode::matches(APT::StringView name, int min, int max) +{ + if (name != term) + return false; + if (max != 0 && !haveArgumentList) + error(rstrprintf("%s expects an argument list", term.to_string().c_str())); + if (max == 0 && haveArgumentList) + error(rstrprintf("%s does not expect an argument list", term.to_string().c_str())); + if (min >= 0 && min == max && (arguments.size() != size_t(min))) + error(rstrprintf("%s expects %d arguments, but received %d arguments", term.to_string().c_str(), min, arguments.size())); + if (min >= 0 && arguments.size() < size_t(min)) + error(rstrprintf("%s expects at least %d arguments, but received %d arguments", term.to_string().c_str(), min, arguments.size())); + if (max >= 0 && arguments.size() > size_t(max)) + error(rstrprintf("%s expects at most %d arguments, but received %d arguments", term.to_string().c_str(), max, arguments.size())); + return true; +} + +} // namespace Internal +} // namespace APT -- cgit v1.2.3