a full text indexing library for javascript
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

181 lines
4.6 KiB

  1. // Copyright (c) 2012, Christopher Ramey | http://github.com/cramey/tidx/blob/master/LICENCE
  2. var Tidx = function()
  3. {
  4. // _index is the weighted, field-based reversed search term index
  5. this._index = {};
  6. // regex used to find terms inside a value
  7. this.v_rx = new RegExp('(:?[a-z_-]+)|(\\d*\\.\\d+)|(\\d+)', 'mg');
  8. // regex used to break out terms within a search
  9. this.s_rx = new RegExp('(:?([a-z\\d-_]+):){0,1}(:?(:?"(.+?)")|([a-z_-]+)|(\\d*\\.\\d+)|(\\d+))', 'img')
  10. // Indexes data inside this object.
  11. this.index = function(tokenize, id, field, value)
  12. {
  13. var f;
  14. switch(typeof field){
  15. // Don't allow undefined fields
  16. case 'undefined': return;
  17. case 'string': f = field.toLowerCase(); break;
  18. default: f = String(field);
  19. }
  20. var v;
  21. switch(typeof value){
  22. // Don't allow undefined, null or object values
  23. case 'null': case 'undefined': case 'object': return;
  24. case 'string':
  25. v = value.toLowerCase();
  26. // Don't index empty fields
  27. if(v.length === 0){ return; }
  28. break;
  29. default: v = String(value);
  30. }
  31. // Add field to field list, as needed
  32. if(this._index[f] === undefined){ this._index[f] = {}; }
  33. if(tokenize === true){
  34. // Iterate over discrete searchable terms
  35. var re;
  36. while((re = this.v_rx.exec(v)) !== null){
  37. var t = re[0];
  38. // If the field in question doesn't have this term already,
  39. // add it.
  40. if(this._index[f][t] === undefined){ this._index[f][t] = {}; }
  41. // Add this id to the reverse index under specific field (f)
  42. // and term (v), if it already exists, increment the weight
  43. if(this._index[f][t][id] === undefined){ this._index[f][t][id] = 1; }
  44. else { this._index[f][t][id]++; }
  45. }
  46. } else {
  47. if(this._index[f][v] === undefined){ this._index[f][v] = {}; }
  48. if(this._index[f][v][id] === undefined){ this._index[f][v][id] = 1; }
  49. else { this._index[f][v][id]++; }
  50. }
  51. };
  52. // Conducts a global index scan for a string (value), iterating
  53. // through all fields. Returns the number of terms it scanned for.
  54. this.global_scan = function(value, result)
  55. {
  56. // Refuse empty searches
  57. if(value.length === 0){ return 0; }
  58. var v = value.toLowerCase();
  59. // Loop over every field
  60. for(var f in this._index){
  61. // Look for the specified search term
  62. if(this._index[f][v] !== undefined){
  63. // If it exists, add the result to r, adding in the weight and count
  64. for(var i in this._index[f][v]){
  65. if(result[i] === undefined){ result[i] = {'w': 0, 'c': 0}; }
  66. result[i]['w'] += this._index[f][v][i];
  67. result[i]['c']++;
  68. }
  69. }
  70. }
  71. return 1;
  72. };
  73. // Conducts a field specific scan for a string (value), returns
  74. // the number of terms it scanned for.
  75. this.field_scan = function(field, value, result)
  76. {
  77. var f;
  78. switch(typeof field){
  79. // Don't allow undefined fields
  80. case 'undefined': return 0;
  81. case 'string': f = field.toLowerCase(); break;
  82. default: f = String(field);
  83. }
  84. if(value.length === 0){ return 0; }
  85. var v = value.toLowerCase();
  86. if(this._index[f] === undefined || this._index[f][v] === undefined){
  87. return 1;
  88. }
  89. for(var i in this._index[f][v]){
  90. if(result[i] === undefined){ result[i] = {'w': 0, 'c': 0}; }
  91. result[i]['w'] += this._index[f][v][i];
  92. result[i]['c']++;
  93. }
  94. return 1;
  95. };
  96. // Raw searching function useful for chaining seaches
  97. this.raw_search = function(search, result)
  98. {
  99. var re, tc=0;
  100. while((re = this.s_rx.exec(search)) !== null){
  101. var field = re[2];
  102. var value;
  103. for(var i=5; i < 9; i++){
  104. if(re[i] !== undefined && re[i].length !== 0){
  105. value = re[i];
  106. break;
  107. }
  108. }
  109. // Field specific scan for term
  110. if(field !== undefined && field.length !== 0){
  111. tc += this.field_scan(field, value, result);
  112. // Global scan for term
  113. } else {
  114. tc += this.global_scan(value, result);
  115. }
  116. }
  117. return tc;
  118. }
  119. // Multi-term searching function. This is what you should use
  120. // to search with in most cases. If and_query is true, returned
  121. // results must match every term. Otherwise, any ID matching any
  122. // term is returned.
  123. this.search = function(and_query, search)
  124. {
  125. var r = {};
  126. var tc = this.raw_search(search, r);
  127. return this.order(r, (and_query ? tc : 0));
  128. };
  129. // Orders the result of a query. Accepts an object as a result, and
  130. // the minimum floor count. In the event of an AND query, floor should
  131. // be quality to the number of search terms. Otherwise it should be zero.
  132. this.order = function(result, floor)
  133. {
  134. var t = [];
  135. for(var i in result){
  136. if(floor == 0 || result[i]['c'] >= floor){
  137. t.push([i, result[i]['w']]);
  138. }
  139. }
  140. t.sort(function(a,b){ return b[1] - a[1]; });
  141. var r = [];
  142. for(var i in t){ r.push(t[i][0]); }
  143. return r;
  144. };
  145. };