a full text indexing library for javascript
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

139 lines
3.3 KiB

  1. // Copyright (c) 2012, Christopher Ramey | http://github.com/cramey/tidx/blob/master/LICENSE
  2. var Tidx = function()
  3. {
  4. // _index is the weighted, field-based reversed search term index
  5. this._index = {};
  6. // regex used to find terms inside a value
  7. this.v_rx = new RegExp('[\\w-]+', 'mg');
  8. // regex used to break out terms with a search
  9. this.s_rx = new RegExp('([\\w-]+)(:?:([\\w-]+)){0,1}', 'mg');
  10. // Adds data to index
  11. this.index = function(id, field, value)
  12. {
  13. var f;
  14. switch(typeof field){
  15. // Don't allow undefined fields
  16. case 'undefined': return;
  17. case 'string': f = field.toLowerCase(); break;
  18. default: f = String(field);
  19. }
  20. var v;
  21. switch(typeof value){
  22. // Don't allow undefined, null or object values
  23. case 'null': case 'undefined': case 'object': return;
  24. case 'string':
  25. v = value.toLowerCase();
  26. // Don't index empty fields
  27. if(v.length == 0){ return; }
  28. break;
  29. default: v = String(value);
  30. }
  31. // Add field to field list, as needed
  32. if(this._index[f] == undefined){ this._index[f] = {}; }
  33. // Iterate over discrete searchable terms
  34. var re;
  35. while((re = this.v_rx.exec(value)) !== null){
  36. var v = re[0].toLowerCase();
  37. // If the field in question doesn't have this term already,
  38. // add it.
  39. if(this._index[f][v] == undefined){ this._index[f][v] = {}; }
  40. // Add this id to the reverse index under specific field (f)
  41. // and term (v), if it already exists, increment the weight
  42. if(this._index[f][v][id] == undefined){
  43. this._index[f][v][id] = 1;
  44. } else {
  45. this._index[f][v][id]++;
  46. }
  47. }
  48. };
  49. // Conducts a global search for a string (value) globally,
  50. // iterating through all fields.
  51. this.gsearch = function(result, value)
  52. {
  53. // Refuse empty searches
  54. if(value.length == 0){ return []; }
  55. var v = value.toLowerCase();
  56. // Loop over every field
  57. for(var f in this._index){
  58. // Look for the specified search term
  59. if(this._index[f][v] != undefined){
  60. // If it exists, add the result to r, adding in the weight
  61. for(var i in this._index[f][v]){
  62. if(result[i] == undefined){ result[i] = 0; }
  63. result[i] += this._index[f][v][i];
  64. }
  65. }
  66. }
  67. };
  68. // Conducts a field specific search for a string (value)
  69. this.fsearch = function(result, field, value)
  70. {
  71. var f;
  72. switch(typeof field){
  73. // Don't allow undefined fields
  74. case 'undefined': return;
  75. case 'string': f = field.toLowerCase(); break;
  76. default: f = String(field);
  77. }
  78. if(value.length == 0){ return []; }
  79. var v = value.toLowerCase();
  80. if(this._index[f] == undefined){ return []; }
  81. if(this._index[f][v] == undefined){ return []; }
  82. for(var i in this._index[f][v]){
  83. if(result[i] == undefined){ result[i] = 0; }
  84. result[i] += this._index[f][v][i];
  85. }
  86. };
  87. // Multi-term searching function - this is what you should use to
  88. // search with, returns an array of found ids ordered by weight
  89. this.search = function(search)
  90. {
  91. var r = {};
  92. var re;
  93. while((re = this.s_rx.exec(search)) !== null){
  94. // Global term
  95. if(re[3] == undefined || re[3] == ''){ this.gsearch(r, re[1]); }
  96. // Field specific term
  97. else { this.fsearch(r, re[1], re[3]); }
  98. }
  99. return this.sortresult(r);
  100. };
  101. this.sortresult = function(o)
  102. {
  103. var t = [];
  104. for(var i in o){ t.push([i, o[i]]); }
  105. t.sort(function(a,b){ return b[1] - a[1]; });
  106. var r = [];
  107. for(var i in t){ r.push(t[i][0]); }
  108. return r;
  109. };
  110. };