001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018
019package org.apache.commons.net.nntp;
020
021/**
022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025 *  
026 * @author rwinston <rwinston@checkfree.com>
027 *
028 */
029
030import java.util.HashMap;
031import java.util.Iterator;
032
033public class Threader {
034    private ThreadContainer root;
035    private HashMap<String,ThreadContainer> idTable;
036    private int bogusIdCount = 0;
037
038    /**
039     * The main threader entry point - The client passes in an array of Threadable objects, and 
040     * the Threader constructs a connected 'graph' of messages
041     * @param messages
042     * @return
043     */
044    public Threadable thread(Threadable[] messages) {
045        if (messages == null)
046            return null;
047
048        idTable = new HashMap<String,ThreadContainer>();
049
050        // walk through each Threadable element
051        for (int i = 0; i < messages.length; ++i) {
052            if (!messages[i].isDummy())
053                buildContainer(messages[i]);
054        }
055
056        root = findRootSet();
057        idTable.clear();
058        idTable = null;
059
060        pruneEmptyContainers(root);
061
062        root.reverseChildren();
063        gatherSubjects();
064
065        if (root.next != null)
066            throw new RuntimeException("root node has a next:" + root);
067
068        for (ThreadContainer r = root.child; r != null; r = r.next) {
069            if (r.threadable == null)
070                r.threadable = r.child.threadable.makeDummy();
071        }
072
073        Threadable result = (root.child == null ? null : root.child.threadable);
074        root.flush();
075        root = null;
076
077        return result;
078    }
079
080    /**
081     * 
082     * @param threadable
083     */
084    private void buildContainer(Threadable threadable) {
085        String id = threadable.messageThreadId();
086        ThreadContainer container = idTable.get(id);
087
088        // A ThreadContainer exists for this id already. This should be a forward reference, but may 
089        // be a duplicate id, in which case we will need to generate a bogus placeholder id
090        if (container != null) {
091            if (container.threadable != null) { // oops! duplicate ids...
092                id = "<Bogus-id:" + (bogusIdCount++) + ">";
093                container = null;
094            } else {
095                // The container just contained a forward reference to this message, so let's
096                // fill in the threadable field of the container with this message
097                container.threadable = threadable;
098            }
099        }
100
101        // No container exists for that message Id. Create one and insert it into the hash table.
102        if (container == null) {
103            container = new ThreadContainer();
104            container.threadable = threadable;
105            idTable.put(id, container);
106        }
107
108        // Iterate through all of the references and create ThreadContainers for any references that
109        // don't have them.
110        ThreadContainer parentRef = null;
111        {
112            String[] references = threadable.messageThreadReferences();
113            for (int i = 0; i < references.length; ++i) {
114                String refString = references[i];
115                ThreadContainer ref = idTable.get(refString);
116
117                // if this id doesnt have a container, create one
118                if (ref == null) {
119                    ref = new ThreadContainer();
120                    idTable.put(refString, ref);
121                }
122
123                // Link references together in the order they appear in the References: header,
124                // IF they dont have a have a parent already &&
125                // IF it will not cause a circular reference
126                if ((parentRef != null)
127                    && (ref.parent == null)
128                    && (parentRef != ref)
129                    && !(parentRef.findChild(ref))) {
130                    // Link ref into the parent's child list
131                    ref.parent = parentRef;
132                    ref.next = parentRef.child;
133                    parentRef.child = ref;
134                }
135                parentRef = ref;
136            }
137        }
138
139        // parentRef is now set to the container of the last element in the references field. make that 
140        // be the parent of this container, unless doing so causes a circular reference
141        if (parentRef != null
142            && (parentRef == container || container.findChild(parentRef)))
143            parentRef = null;
144
145        // if it has a parent already, its because we saw this message in a References: field, and presumed
146        // a parent based on the other entries in that field. Now that we have the actual message, we can
147        // throw away the old parent and use this new one
148        if (container.parent != null) {
149            ThreadContainer rest, prev;
150
151            for (prev = null, rest = container.parent.child;
152                rest != null;
153                prev = rest, rest = rest.next) {
154                if (rest == container)
155                    break;
156            }
157
158            if (rest == null) {
159                throw new RuntimeException(
160                    "Didnt find "
161                        + container
162                        + " in parent"
163                        + container.parent);
164            }
165
166            // Unlink this container from the parent's child list
167            if (prev == null)
168                container.parent.child = container.next;
169            else
170                prev.next = container.next;
171
172            container.next = null;
173            container.parent = null;
174        }
175
176        // If we have a parent, link container into the parents child list
177        if (parentRef != null) {
178            container.parent = parentRef;
179            container.next = parentRef.child;
180            parentRef.child = container;
181        }
182    }
183
184    /**
185     * Find the root set of all existing ThreadContainers
186     * @return root the ThreadContainer representing the root node
187     */
188    private ThreadContainer findRootSet() {
189        ThreadContainer root = new ThreadContainer();
190        Iterator<String> iter = idTable.keySet().iterator();
191
192        while (iter.hasNext()) {
193            Object key = iter.next();
194            ThreadContainer c = idTable.get(key);
195            if (c.parent == null) {
196                if (c.next != null)
197                    throw new RuntimeException(
198                        "c.next is " + c.next.toString());
199                c.next = root.child;
200                root.child = c;
201            }
202        }
203        return root;
204    }
205
206    /**
207     * Delete any empty or dummy ThreadContainers
208     * @param parent
209     */
210    private void pruneEmptyContainers(ThreadContainer parent) {
211        ThreadContainer container, prev, next;
212        for (prev = null, container = parent.child, next = container.next;
213            container != null;
214            prev = container,
215                container = next,
216                next = (container == null ? null : container.next)) {
217
218            // Is it empty and without any children? If so,delete it 
219            if (container.threadable == null && container.child == null) {
220                if (prev == null)
221                    parent.child = container.next;
222                else
223                    prev.next = container.next;
224
225                // Set container to prev so that prev keeps its same value the next time through the loop
226                container = prev;
227            }
228
229            // Else if empty, with kids, and (not at root or only one kid)
230            else if (
231                container.threadable == null
232                    && container.child != null
233                    && (container.parent != null
234                        || container.child.next == null)) {
235                // We have an invalid/expired message with kids. Promote the kids to this level. 
236                ThreadContainer tail;
237                ThreadContainer kids = container.child;
238
239                // Remove this container and replace with 'kids'.
240                if (prev == null)
241                    parent.child = kids;
242                else
243                    prev.next = kids;
244
245                // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
246                // i.e. splice kids into the list in place of container
247                for (tail = kids; tail.next != null; tail = tail.next)
248                    tail.parent = container.parent;
249
250                tail.parent = container.parent;
251                tail.next = container.next;
252
253                // next currently points to the item after the inserted items in the chain - reset that so we process the newly
254                // promoted items next time round
255                next = kids;
256
257                // Set container to prev so that prev keeps its same value the next time through the loop
258                container = prev;
259            } else if (container.child != null) {
260                // A real message , with kids
261                // Iterate over the children
262                pruneEmptyContainers(container);
263            }
264        }
265    }
266
267    /**
268     *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 
269     */
270    private void gatherSubjects() {
271
272        int count = 0;
273
274        for (ThreadContainer c = root.child; c != null; c = c.next)
275            count++;
276
277        // TODO verify this will avoid rehashing
278        HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
279        count = 0;
280
281        for (ThreadContainer c = root.child; c != null; c = c.next) {
282            Threadable threadable = c.threadable;
283
284            // No threadable? If so, it is a dummy node in the root set.
285            // Only root set members may be dummies, and they alway have at least 2 kids
286            // Take the first kid as representative of the subject
287            if (threadable == null)
288                threadable = c.child.threadable;
289
290            String subj = threadable.simplifiedSubject();
291
292            if (subj == null || subj == "")
293                continue;
294
295            ThreadContainer old = subjectTable.get(subj);
296
297            // Add this container to the table iff:
298            // - There exists no container with this subject
299            // - or this is a dummy container and the old one is not - the dummy one is
300            // more interesting as a root, so put it in the table instead
301            // - The container in the table has a "Re:" version of this subject, and 
302            // this container has a non-"Re:" version of this subject. The non-"Re:" version
303            // is the more interesting of the two.
304            if (old == null
305                || (c.threadable == null && old.threadable != null)
306                || (old.threadable != null
307                    && old.threadable.subjectIsReply()
308                    && c.threadable != null
309                    && !c.threadable.subjectIsReply())) {
310                subjectTable.put(subj, c);
311                count++;
312            }
313        }
314
315        // If the table is empty, we're done
316        if (count == 0)
317            return;
318
319        // subjectTable is now populated with one entry for each subject which occurs in the 
320        // root set. Iterate over the root set, and gather together the difference.
321        ThreadContainer prev, c, rest;
322        for (prev = null, c = root.child, rest = c.next;
323            c != null;
324            prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
325            Threadable threadable = c.threadable;
326
327            // is it a dummy node?
328            if (threadable == null)
329                threadable = c.child.threadable;
330
331            String subj = threadable.simplifiedSubject();
332
333            // Dont thread together all subjectless messages
334            if (subj == null || subj == "")
335                continue;
336
337            ThreadContainer old = subjectTable.get(subj);
338
339            if (old == c) // That's us
340                continue;
341
342            // We have now found another container in the root set with the same subject
343            // Remove the "second" message from the root set
344            if (prev == null)
345                root.child = c.next;
346            else
347                prev.next = c.next;
348            c.next = null;
349
350            if (old.threadable == null && c.threadable == null) {
351                // both dummies - merge them
352                ThreadContainer tail;
353                for (tail = old.child;
354                    tail != null && tail.next != null;
355                    tail = tail.next);
356
357                tail.next = c.child;
358
359                for (tail = c.child; tail != null; tail = tail.next)
360                    tail.parent = old;
361
362                c.child = null;
363            } else if (
364                old.threadable == null
365                    || (c.threadable != null
366                        && c.threadable.subjectIsReply()
367                        && !old.threadable.subjectIsReply())) {
368                // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
369                c.parent = old;
370                c.next = old.child;
371                old.child = c;
372            } else {
373                // else make the old and new messages be children of a new dummy container.
374                // We create a new container object for old.msg and empty the old container
375                ThreadContainer newc = new ThreadContainer();
376                newc.threadable = old.threadable;
377                newc.child = old.child;
378
379                for (ThreadContainer tail = newc.child;
380                    tail != null;
381                    tail = tail.next)
382                    tail.parent = newc;
383
384                old.threadable = null;
385                old.child = null;
386
387                c.parent = old;
388                newc.parent = old;
389
390                // Old is now a dummy- give it 2 kids , c and newc
391                old.child = c;
392                c.next = newc;
393            }
394            // We've done a merge, so keep the same prev
395            c = prev;
396        }
397
398        subjectTable.clear();
399        subjectTable = null;
400
401    }
402}
403
404/**
405 * A placeholder utility class, used for constructing a tree of Threadables
406 * Originall implementation by Jamie Zawinski. 
407 * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
408 * Threadable objects
409 * @author Rory Winston <rwinston@checkfree.com>
410 */
411class ThreadContainer {
412    Threadable threadable;
413    ThreadContainer parent;
414    ThreadContainer prev;
415    ThreadContainer next;
416    ThreadContainer child;
417
418    /**
419     * 
420     * @param container
421     * @return true if child is under self's tree. Detects circular references
422     */
423    boolean findChild(ThreadContainer target) {
424        if (child == null)
425            return false;
426
427        else if (child == target)
428            return true;
429        else
430            return child.findChild(target);
431    }
432
433    // Copy the ThreadContainer tree structure down into the underlying Threadable objects
434    // (Make the Threadable tree look like the ThreadContainer tree)
435    void flush() {
436        if (parent != null && threadable == null)
437            throw new RuntimeException("no threadable in " + this.toString());
438
439        parent = null;
440
441        if (threadable != null)
442            threadable.setChild(child == null ? null : child.threadable);
443
444        if (child != null) {
445            child.flush();
446            child = null;
447        }
448
449        if (threadable != null)
450            threadable.setNext(next == null ? null : next.threadable);
451
452        if (next != null) {
453            next.flush();
454            next = null;
455        }
456
457        threadable = null;
458    }
459
460    /**
461     * Reverse the entire set of children
462     *
463     */
464    void reverseChildren() {
465        if (child != null) {
466            ThreadContainer kid, prev, rest;
467            for (prev = null, kid = child, rest = kid.next;
468                kid != null;
469                prev = kid,
470                    kid = rest,
471                    rest = (rest == null ? null : rest.next))
472                kid.next = prev;
473
474            child = prev;
475
476            // Do it for the kids 
477            for (kid = child; kid != null; kid = kid.next)
478                kid.reverseChildren();
479        }
480    }
481}