Index: omqueryinternal.cc =================================================================== --- omqueryinternal.cc (revision 7525) +++ omqueryinternal.cc (working copy) @@ -657,12 +657,38 @@ } } -/// Change, eg, A NEAR (B AND C) to (A NEAR B) AND (A NEAR C) +/// NEAR and PHRASE are distributive over Boolean operators. Ie: +/// A NEAR (B AND C) is equivalent to (A NEAR B) AND (A NEAR C) +/// +/// As the NEAR/PHRASE matcher currently only accepts leaf terms, we +/// transform an original NEAR/PHRASE query by executing the +/// distribution until all subqueries of NEAR/PHRASE queries are leafs +/// +/// The distributing operation is performed one subquery at a time, by +/// building a new query which then replaces (is swapped with) the +/// original. The new object is not a NEAR/PHRASE query any more, but +/// its subqueries are, and the process is repeated by recursively +/// calling flatten_subqs() on them. +/// +/// The new query is built in the following fashion: +/// - It has an operation which is the one from the distributed +/// subquery (ie AND/OR) +/// - Its list of subqueries is obtained by successively replacing in +/// the father objects's subquery list the distributed subquery by +/// each if its elements. +/// void Xapian::Query::Internal::flatten_subqs() { - Assert(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE); + if (!(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE)) + return; + //static string tabs; + //fprintf(stderr, "%sflatten_subqs():%s: %s\n", tabs.c_str(), + // get_op_name(op).c_str(), get_description().c_str()); + //tabs += " "; + + // Look for a non leaf subquery. If none found, there is nothing to do. subquery_list::iterator sq; for (sq = subqs.begin(); sq != subqs.end(); sq++) { if (!is_leaf((*sq)->op)) break; @@ -675,17 +701,23 @@ throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE"); } + // Got one non leaf subquery. Make a copy of it, and replace + // it by a hole in my subquery list AutoPtr flattenme(*sq); *sq = 0; + // Note: me invalid here . Hole in subqs ! - // New query to build up. + // New query to build up. Its operator is the one from the + // subquery we're distributing over Xapian::Query::Internal newq(flattenme->op, 0); + // Successively place the subquery's subqueries in the empty + // space, and add the resulting query (me) as a subquery of + // the one we are building subquery_list::iterator j; for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) { *sq = *j; *j = 0; - flatten_subqs(); newq.add_subquery(*this); delete *sq; *sq = 0; @@ -695,14 +727,32 @@ Assert(newq2); this->swap(*newq2); } + + // I am now a boolean query combining possibly non-flat + // NEAR/PHRASE subqueries. Repeat the process on them. Note that + // while the tree has deepened, the NEAR layer also has gone down + // one step, which it does at each recursion. As the tree will + // stop deepening when all the subqueries are leafs, the recursion + // is finite. + for (sq = subqs.begin(); sq != subqs.end(); sq++) { + (*sq)->flatten_subqs(); + } + + //tabs.erase(tabs.length() - 2); + //fprintf(stderr, "%sdone flatten_subqs()\n", tabs.c_str()); } +/** + * Add a subquery. If the subquery is of the same AND, OR, XOR type as + * me, I can add its own subqueries to mine, which avoids deepening the + * tree. Else just append it to my list. + */ void Xapian::Query::Internal::add_subquery(const Xapian::Query::Internal & subq) { Assert(!is_leaf(op)); if (op == subq.op && (op == OP_AND || op == OP_OR || op == OP_XOR)) { - // Distribute the subquery. + // Flatten: concatenate the subquerys's subqueries to mine for (subquery_list::const_iterator i = subq.subqs.begin(); i != subq.subqs.end(); i++) { add_subquery(**i);