uBlock/js/liquid-dict.js

/*******************************************************************************

    µBlock - a Chromium browser extension to block requests.
    Copyright (C) 2014 Raymond Hill

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see {http://www.gnu.org/licenses/}.

    Home: https://github.com/gorhill/uBlock
*/

/******************************************************************************/

µBlock.LiquidDict = (function() {

/******************************************************************************/

var LiquidDict = function() {
    this.dict = {};
    this.count = 0;
    this.bucketCount = 0;
    this.frozenBucketCount = 0;

    // Somewhat arbitrary: I need to come up with hard data to know at which
    // point binary search is better than indexOf.
    this.cutoff = 500;
};

/******************************************************************************/

var meltBucket = function(ldict, len, bucket) {
    ldict.frozenBucketCount -= 1;
    var map = {};
    if ( bucket.charAt(0) === ' ' ) {
        bucket.trim().split(' ').map(function(k) {
            map[k] = true;
        });
    } else {
        var offset = 0;
        while ( offset < bucket.length ) {
            map[bucket.substring(offset, len)] = true;
            offset += len;
        }
    }
    return map;
};

/******************************************************************************/

var melt = function(ldict) {
    var buckets = ldict.dict;
    var bucket;
    for ( var key in buckets ) {
        bucket = buckets[key];
        if ( typeof bucket === 'string' ) {
            buckets[key] = meltBucket(ldict, key.charCodeAt(0) & 0xFF, bucket);
        }
    }
};

/******************************************************************************/

var freezeBucket = function(ldict, bucket) {
    ldict.frozenBucketCount += 1;
    var words = Object.keys(bucket);
    var wordLen = words[0].length;
    if ( wordLen * words.length < ldict.cutoff ) {
        return ' ' + words.join(' ') + ' ';
    }
    return words.sort().join('');
};

/******************************************************************************/

// How the key is derived dictates the number and size of buckets.
//
// http://jsperf.com/makekey-concat-vs-join/3
//
// Question: Why is using a prototyped function better than a standalone
// helper function?

LiquidDict.prototype.makeKey = function(word) {
    var len = word.length;
    if ( len > 255 ) {
        len = 255;
    }
    var i = len >> 2;
    return String.fromCharCode(
        (word.charCodeAt(    0) & 0x03) << 14 |
        (word.charCodeAt(    i) & 0x03) << 12 |
        (word.charCodeAt(  i+i) & 0x03) << 10 |
        (word.charCodeAt(i+i+i) & 0x03) <<  8 |
        len
    );
};

/******************************************************************************/

LiquidDict.prototype.test = function(word) {
    var key = this.makeKey(word);
    var bucket = this.dict[key];
    if ( bucket === undefined ) {
        return false;
    }
    if ( typeof bucket === 'object' ) {
        return bucket[word] !== undefined;
    }
    if ( bucket.charAt(0) === ' ' ) {
        return bucket.indexOf(' ' + word + ' ') >= 0;
    }
    // binary search
    var len = word.length;
    var left = 0;
    // http://jsperf.com/or-vs-floor/3
    var right = ~~(bucket.length / len + 0.5);
    var i, needle;
    while ( left < right ) {
        i = left + right >> 1;
        needle = bucket.substr( len * i, len );
        if ( word < needle ) {
            right = i;
        } else if ( word > needle ) {
            left = i + 1;
        } else {
            return true;
        }
    }
    return false;
};

/******************************************************************************/

LiquidDict.prototype.add = function(word) {
    var key = this.makeKey(word);
    if ( key === undefined ) {
        return false;
    }
    var bucket = this.dict[key];
    if ( bucket === undefined ) {
        this.dict[key] = bucket = {};
        this.bucketCount += 1;
        bucket[word] = true;
        this.count += 1;
        return true;
    } else if ( typeof bucket === 'string' ) {
        this.dict[key] = bucket = meltBucket(this, word.len, bucket);
    }
    if ( bucket[word] === undefined ) {
        bucket[word] = true;
        this.count += 1;
        return true;
    }
    return false;
};

/******************************************************************************/

LiquidDict.prototype.freeze = function() {
    var buckets = this.dict;
    var bucket;
    for ( var key in buckets ) {
        bucket = buckets[key];
        if ( typeof bucket === 'object' ) {
            buckets[key] = freezeBucket(this, bucket);
        }
    }
};

/******************************************************************************/

LiquidDict.prototype.reset = function() {
    this.dict = {};
    this.count = 0;
    this.bucketCount = 0;
    this.frozenBucketCount = 0;
};

/******************************************************************************/

return LiquidDict;

/******************************************************************************/

})();

/******************************************************************************/