summaryrefslogtreecommitdiffstats
path: root/xapian-core/build/xapNearDistrib-1.0.patch
blob: 7eadae038954968f48c23484040b07c1fbc3e471 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
--- omqueryinternal.cc.orig	2007-07-05 02:39:41.000000000 +0200
+++ omqueryinternal.cc	2007-08-30 12:47:03.000000000 +0200
@@ -763,12 +763,38 @@
     }
 }
 
-/// Change, eg, A NEAR (B AND C) to (A NEAR B) AND (A NEAR C)
+/// NEAR and PHRASE are distributive over Boolean operators. Ie:
+/// A NEAR (B AND C) is equivalent to (A NEAR B) AND (A NEAR C)
+///
+/// As the NEAR/PHRASE matcher currently only accepts leaf terms, we
+/// transform an original NEAR/PHRASE query by executing the
+/// distribution until all subqueries of NEAR/PHRASE queries are leafs
+///
+/// The distributing operation is performed one subquery at a time, by
+/// building a new query which then replaces (is swapped with) the
+/// original. The new object is not a NEAR/PHRASE query any more, but
+/// its subqueries are, and the process is repeated by recursively
+/// calling flatten_subqs() on them.
+///
+/// The new query is built in the following fashion:
+///  - It has an operation which is the one from the distributed
+///    subquery (ie AND/OR)
+///  - Its list of subqueries is obtained by successively replacing in
+///    the father objects's subquery list the distributed subquery by
+///    each if its elements.
+/// 
 void
 Xapian::Query::Internal::flatten_subqs()
 {
-    Assert(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE);
+    if (!(op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE))
+	return;
 
+    //static string tabs;
+    //fprintf(stderr, "%sflatten_subqs():%s: %s\n", tabs.c_str(),
+    //  get_op_name(op).c_str(), get_description().c_str());
+    //tabs += "  ";
+
+    // Look for a non leaf subquery. If none found, there is nothing to do.
     subquery_list::iterator sq;
     for (sq = subqs.begin(); sq != subqs.end(); sq++) {
 	if (!is_leaf((*sq)->op)) break;
@@ -781,17 +807,23 @@
 	    throw Xapian::UnimplementedError("Can't use NEAR/PHRASE with a subexpression containing NEAR or PHRASE");
 	}
 
+	// Got one non leaf subquery. Make a copy of it, and replace
+	// it by a hole in my subquery list
 	AutoPtr<Xapian::Query::Internal> flattenme(*sq);
 	*sq = 0;
+	// Note: me invalid here . Hole in subqs !
 
-	// New query to build up.
+	// New query to build up. Its operator is the one from the
+	// subquery we're distributing over
 	Xapian::Query::Internal newq(flattenme->op, 0);
 
+	// Successively place the subquery's subqueries in the empty
+	// space, and add the resulting query (me) as a subquery of
+	// the one we are building
 	subquery_list::iterator j;
 	for (j = flattenme->subqs.begin(); j != flattenme->subqs.end(); ++j) {
 	    *sq = *j;
 	    *j = 0;
-	    flatten_subqs();
 	    newq.add_subquery(this);
 	    delete *sq;
 	    *sq = 0;
@@ -801,6 +833,19 @@
 	Assert(newq2);
 	this->swap(*newq2);
     }
+
+    // I am now a boolean query combining possibly non-flat
+    // NEAR/PHRASE subqueries. Repeat the process on them. Note that
+    // while the tree has deepened, the NEAR layer also has gone down
+    // one step, which it does at each recursion. As the tree will
+    // stop deepening when all the subqueries are leafs, the recursion
+    // is finite.
+    for (sq = subqs.begin(); sq != subqs.end(); sq++) {
+    	(*sq)->flatten_subqs();
+    }
+    
+    //tabs.erase(tabs.length() - 2);
+    //fprintf(stderr, "%sdone flatten_subqs()\n", tabs.c_str());
 }
 
 void