You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
181 lines
4.6 KiB
181 lines
4.6 KiB
// Copyright (c) 2012, Christopher Ramey | http://github.com/cramey/tidx/blob/master/LICENCE
|
|
var Tidx = function()
|
|
{
|
|
// _index is the weighted, field-based reversed search term index
|
|
this._index = {};
|
|
|
|
// regex used to find terms inside a value
|
|
this.v_rx = new RegExp('(:?[a-z_-]+)|(\\d*\\.\\d+)|(\\d+)', 'mg');
|
|
|
|
// regex used to break out terms within a search
|
|
this.s_rx = new RegExp('(:?([a-z\\d-_]+):){0,1}(:?(:?"(.+?)")|([a-z_-]+)|(\\d*\\.\\d+)|(\\d+))', 'img')
|
|
|
|
|
|
// Indexes data inside this object.
|
|
this.index = function(tokenize, id, field, value)
|
|
{
|
|
var f;
|
|
switch(typeof field){
|
|
// Don't allow undefined fields
|
|
case 'undefined': return;
|
|
case 'string': f = field.toLowerCase(); break;
|
|
default: f = String(field);
|
|
}
|
|
|
|
var v;
|
|
switch(typeof value){
|
|
// Don't allow undefined, null or object values
|
|
case 'null': case 'undefined': case 'object': return;
|
|
|
|
case 'string':
|
|
v = value.toLowerCase();
|
|
// Don't index empty fields
|
|
if(v.length === 0){ return; }
|
|
break;
|
|
|
|
default: v = String(value);
|
|
}
|
|
|
|
// Add field to field list, as needed
|
|
if(this._index[f] === undefined){ this._index[f] = {}; }
|
|
|
|
if(tokenize === true){
|
|
// Iterate over discrete searchable terms
|
|
var re;
|
|
while((re = this.v_rx.exec(v)) !== null){
|
|
var t = re[0];
|
|
// If the field in question doesn't have this term already,
|
|
// add it.
|
|
if(this._index[f][t] === undefined){ this._index[f][t] = {}; }
|
|
|
|
// Add this id to the reverse index under specific field (f)
|
|
// and term (v), if it already exists, increment the weight
|
|
if(this._index[f][t][id] === undefined){ this._index[f][t][id] = 1; }
|
|
else { this._index[f][t][id]++; }
|
|
}
|
|
} else {
|
|
if(this._index[f][v] === undefined){ this._index[f][v] = {}; }
|
|
|
|
if(this._index[f][v][id] === undefined){ this._index[f][v][id] = 1; }
|
|
else { this._index[f][v][id]++; }
|
|
}
|
|
};
|
|
|
|
|
|
// Conducts a global index scan for a string (value), iterating
|
|
// through all fields. Returns the number of terms it scanned for.
|
|
this.global_scan = function(value, result)
|
|
{
|
|
// Refuse empty searches
|
|
if(value.length === 0){ return 0; }
|
|
|
|
var v = value.toLowerCase();
|
|
|
|
// Loop over every field
|
|
for(var f in this._index){
|
|
// Look for the specified search term
|
|
if(this._index[f][v] !== undefined){
|
|
// If it exists, add the result to r, adding in the weight and count
|
|
for(var i in this._index[f][v]){
|
|
if(result[i] === undefined){ result[i] = {'w': 0, 'c': 0}; }
|
|
result[i]['w'] += this._index[f][v][i];
|
|
result[i]['c']++;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
};
|
|
|
|
|
|
// Conducts a field specific scan for a string (value), returns
|
|
// the number of terms it scanned for.
|
|
this.field_scan = function(field, value, result)
|
|
{
|
|
var f;
|
|
switch(typeof field){
|
|
// Don't allow undefined fields
|
|
case 'undefined': return 0;
|
|
case 'string': f = field.toLowerCase(); break;
|
|
default: f = String(field);
|
|
}
|
|
|
|
if(value.length === 0){ return 0; }
|
|
var v = value.toLowerCase();
|
|
|
|
if(this._index[f] === undefined || this._index[f][v] === undefined){
|
|
return 1;
|
|
}
|
|
|
|
for(var i in this._index[f][v]){
|
|
if(result[i] === undefined){ result[i] = {'w': 0, 'c': 0}; }
|
|
result[i]['w'] += this._index[f][v][i];
|
|
result[i]['c']++;
|
|
}
|
|
|
|
return 1;
|
|
};
|
|
|
|
|
|
// Raw searching function useful for chaining seaches
|
|
this.raw_search = function(search, result)
|
|
{
|
|
var re, tc=0;
|
|
while((re = this.s_rx.exec(search)) !== null){
|
|
var field = re[2];
|
|
|
|
var value;
|
|
for(var i=5; i < 9; i++){
|
|
if(re[i] !== undefined && re[i].length !== 0){
|
|
value = re[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Field specific scan for term
|
|
if(field !== undefined && field.length !== 0){
|
|
tc += this.field_scan(field, value, result);
|
|
// Global scan for term
|
|
} else {
|
|
tc += this.global_scan(value, result);
|
|
}
|
|
}
|
|
|
|
return tc;
|
|
}
|
|
|
|
|
|
// Multi-term searching function. This is what you should use
|
|
// to search with in most cases. If and_query is true, returned
|
|
// results must match every term. Otherwise, any ID matching any
|
|
// term is returned.
|
|
this.search = function(and_query, search)
|
|
{
|
|
var r = {};
|
|
var tc = this.raw_search(search, r);
|
|
|
|
return this.order(r, (and_query ? tc : 0));
|
|
};
|
|
|
|
|
|
// Orders the result of a query. Accepts an object as a result, and
|
|
// the minimum floor count. In the event of an AND query, floor should
|
|
// be quality to the number of search terms. Otherwise it should be zero.
|
|
this.order = function(result, floor)
|
|
{
|
|
var t = [];
|
|
|
|
for(var i in result){
|
|
if(floor == 0 || result[i]['c'] >= floor){
|
|
t.push([i, result[i]['w']]);
|
|
}
|
|
}
|
|
|
|
t.sort(function(a,b){ return b[1] - a[1]; });
|
|
|
|
var r = [];
|
|
for(var i in t){ r.push(t[i][0]); }
|
|
|
|
return r;
|
|
};
|
|
};
|