OracleText: Difference between revisions

From Request Tracker Wiki
Jump to navigation Jump to search
m (2 revisions imported)
 
(No difference)

Latest revision as of 15:15, 6 April 2016

This article will describe, in short, what I have done to speed up queries which searches inside email bodies/attachments and RTFM articles which contain large amounts of WikiText.

There are 3 things that need to be done:

  • patch SearchBuilder, to remove pre/post pend of %, change LIKE to contain(Content,'searchstring')>0
  • change RTFM Search.html, remove pre/post pend of % on WikiText Customfields
  • add Oracle Text indexes for Attachments.content and ObjectCustomFieldValues.largecontent

Attached are the file needed todo this, that functionality isn't there anymore ;-( So here they come inline

Start Searchbuilder patch no. 1

--- SearchBuilder.pm.orig	2007-07-07 22:45:00.000000000 +0200
+++ SearchBuilder.pm	2007-07-10 08:49:00.000000000 +0200
@@ -767,9 +767,15 @@

     if ( $args{'FIELD'} ) {

-        #If it's a like, we supply the %s around the search term
+        #If it's a like, we supply the %s around the search term only if its not Oracle
+        #because for Oracle we'll use where contains(content,'text')>1
         if ( $args{'OPERATOR'} =~ /LIKE/i ) {
-            $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+            if ( $RT::DatabaseType eq 'Oracle') {
+                 $args{'VALUE'} = $args{'VALUE'}
+            }
+            else {
+                $args{'VALUE'} = "%" . $args{'VALUE'} . "%";
+            }
         }
         elsif ( $args{'OPERATOR'} =~ /STARTSWITH/i ) {
             $args{'VALUE'}    = $args{'VALUE'} . "%";
@@ -932,6 +938,49 @@
         value => $args{'VALUE'},
     };

+# Keep the original clause and modify if dbtype is Oracle
+# This patch makes searching for ticket content real fast when you have the appropriate
+# index on Attachments.Content and ObjectCustomFieldValues.Largecontent for RTFM
+# CREATE INDEX CNT ON ATTACHMENTS (CONTENT) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# CREATE INDEX FM_LARGE_CNT ON ObjectCustomFieldValues(Largecontent) INDEXTYPE IS CTXSYS.CONTEXT PARAMETERS('memory 4M');
+# parameters is tunable, see Oracle docs ;-)
+# fixed the static replace Attachments_2.Content by dynamic version for both RT and RTFM Content fields
+# also changed RTFM/Article/Search.html NOT to pre and post pend '%'
+# other things needed is a FMT column for Attachments and ObjectCustomFieldValues
+# which takes care of not including base64 text into the index
+# does this by using a trigger which fills the FMT column and using the appropriate
+# options when creating the Text index. (saved 25% of unneeded index room ~1Gb out of ~4Gb)
+    if ( $RT::DatabaseType eq 'Oracle' ) {
+        my ($tmpfield);
+		# First fix RT to use OracleText when searching Ticket Content
+        if ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/  and $args{'OPERATOR'} eq 'LIKE') {
+			# get rid of the lower since you can't have a functional context index.
+			# don't care about the remaining (), they don't have a significant effect on performance
+			# you can however tell Context to index casesensitive or caseinsensitive.
+			# this is probably not very elegant, excuse my perl ;-)
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+        elsif ($QualifiedField =~ /lower\(Attachments_\d+\.Content\)/  and $args{'OPERATOR'} eq 'NOT LIKE') {
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+		# Second fix RTFM to use OracleText when searching Article Content and LargeContent
+        elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'LIKE') {
+     	    $tmpfield = $QualifiedField;
+    	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+		elsif ($QualifiedField =~ /ObjectCustomFieldValues_\d+\.Largecontent/ and $args{'OPERATOR'} eq 'NOT LIKE') {
+     	    $tmpfield = $QualifiedField;
+	   	    $tmpfield =~ s/(lower)?//g;
+            $clause = '(not contains(' . $tmpfield . ',' . $args{'VALUE'} . ') >0)';
+        }
+
+    }
+
     # Juju because this should come _AFTER_ the EA
     my @prefix;
     if ( $self->{_open_parens}{ $ClauseId } ) {

End Searchbuilder patch no. 1

Start Searchbuilder patch no. 2

--- Handle.pm.orig	2007-04-10 22:48:42.000000000 +0200
+++ Handle.pm	2007-07-12 11:13:00.000000000 +0200
@@ -1101,6 +1101,14 @@
     # of here as we can't parse subclauses
     return 1 if grep $_ ne 'generic_restrictions', keys %{ $args{'SearchBuilder'}->{'subclauses'} };

+# JvdW 2007-07-12
+# Get out of this function if were dealing with either Attachments or ObjectCustomFieldValues
+# since these are modified and parser trips over '(contains(content,'test')>0)
+# haven't seen any side effect but that doesn't mean there aren't any.
+# feel free to fix this lousy perl of mine ;-)
+	return 1 if $args{'ALIAS'} =~ /Attachments_\d+/ ;
+ 	return 1 if $args{'ALIAS'} =~ /ObjectCustomFieldValues_\d+/ ;
+
     # build full list of generic conditions
     my @conditions;
     foreach ( grep @$_, values %{ $args{'SearchBuilder'}->{'restrictions'} } ) {

End Searchbuilder patch no. 2

Start Search.html=== copy share/html/RTFM/Article/Search.hmtl to local/html/RTFM/Article/Search.html edit Search.html (find the first and second occurrence of $value = "%$1%"; and change it to $value = "$1"; leave the third and fourth occurrence alone, they are responsible for searching small string values and don't have a huge impact on performance. ===End Search.html

Start OracleTest part

/* Direct_datastore is for text in 1 column further no attributes */
 
 begin
   ctx_ddl.drop_preference('"CNT_DST"');
 end;
 
 begin
   ctx_ddl.create_preference('"CNT_DST"','DIRECT_DATASTORE');
 end;
 /
 
 /* NULL_FILTER because we only want text indexing or auto_filter because*/
 /* we add an extra fmt column to Attachments which holds a flag whether */
 /* that row is indexed or not */
 
 begin
   ctx_ddl.drop_preference('"CNT_FIL"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_FIL"','AUTO_FILTER');
 end;
 /
 
 begin
   ctx_ddl.drop_section_group('"CNT_SGP"');
 end;
 /
 begin
   ctx_ddl.create_section_group('"CNT_SGP"','NULL_SECTION_GROUP');
 end;
 /
 
 /* BASIC_LEXER is sufficient because we only index English/Dutch  */
 /* Its wise to set some attributes, look at Oracle Docs for which */
 begin
   ctx_ddl.drop_preference('"CNT_LEX"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_LEX"','BASIC_LEXER');
   ctx_ddl.set_attribute('CNT_LEX', 'composite', 'DUTCH');
   ctx_ddl.set_attribute('CNT_LEX', 'index_themes', 'NO');
   ctx_ddl.set_attribute('CNT_LEX', 'index_stems', 'DUTCH');
   ctx_ddl.set_attribute('CNT_LEX', 'index_text', 'YES');
   ctx_ddl.set_attribute('CNT_LEX', 'printjoins', '_-.');
 end;
 /
 
 /* BASIC_WORDLIST to set properties of stop words            */
 /* in my case I have added Dutch and English, edit to needs  */
 begin
   ctx_ddl.drop_preference('"CNT_WDL"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_WDL"','BASIC_WORDLIST');
   ctx_ddl.set_attribute('"CNT_WDL"','STEMMER','DUTCH');
   ctx_ddl.set_attribute('"CNT_WDL"','FUZZY_MATCH','DUTCH');
 end;
 /
 
 begin
   ctx_ddl.drop_stoplist('"CNT_SPL"');
 end;
 /
 begin
   ctx_ddl.create_stoplist('"CNT_SPL"','BASIC_STOPLIST');
   ctx_ddl.add_stopword('"CNT_SPL"','Corp');
   ctx_ddl.add_stopword('"CNT_SPL"','Mr');
   ctx_ddl.add_stopword('"CNT_SPL"','Mrs');
   ctx_ddl.add_stopword('"CNT_SPL"','Ms');
   ctx_ddl.add_stopword('"CNT_SPL"','Mz');
   ctx_ddl.add_stopword('"CNT_SPL"','a');
   ctx_ddl.add_stopword('"CNT_SPL"','about');
   ctx_ddl.add_stopword('"CNT_SPL"','after');
   ctx_ddl.add_stopword('"CNT_SPL"','all');
   ctx_ddl.add_stopword('"CNT_SPL"','also');
   ctx_ddl.add_stopword('"CNT_SPL"','an');
   ctx_ddl.add_stopword('"CNT_SPL"','and');
   ctx_ddl.add_stopword('"CNT_SPL"','any');
   ctx_ddl.add_stopword('"CNT_SPL"','are');
   ctx_ddl.add_stopword('"CNT_SPL"','as');
   ctx_ddl.add_stopword('"CNT_SPL"','at');
   ctx_ddl.add_stopword('"CNT_SPL"','be');
   ctx_ddl.add_stopword('"CNT_SPL"','because');
   ctx_ddl.add_stopword('"CNT_SPL"','been');
   ctx_ddl.add_stopword('"CNT_SPL"','but');
   ctx_ddl.add_stopword('"CNT_SPL"','by');
   ctx_ddl.add_stopword('"CNT_SPL"','can');
   ctx_ddl.add_stopword('"CNT_SPL"','co');
   ctx_ddl.add_stopword('"CNT_SPL"','could');
   ctx_ddl.add_stopword('"CNT_SPL"','for');
   ctx_ddl.add_stopword('"CNT_SPL"','from');
   ctx_ddl.add_stopword('"CNT_SPL"','had');
   ctx_ddl.add_stopword('"CNT_SPL"','has');
   ctx_ddl.add_stopword('"CNT_SPL"','have');
   ctx_ddl.add_stopword('"CNT_SPL"','he');
   ctx_ddl.add_stopword('"CNT_SPL"','her');
   ctx_ddl.add_stopword('"CNT_SPL"','his');
   ctx_ddl.add_stopword('"CNT_SPL"','if');
   ctx_ddl.add_stopword('"CNT_SPL"','in');
   ctx_ddl.add_stopword('"CNT_SPL"','inc');
   ctx_ddl.add_stopword('"CNT_SPL"','into');
   ctx_ddl.add_stopword('"CNT_SPL"','is');
   ctx_ddl.add_stopword('"CNT_SPL"','it');
   ctx_ddl.add_stopword('"CNT_SPL"','its');
   ctx_ddl.add_stopword('"CNT_SPL"','last');
   ctx_ddl.add_stopword('"CNT_SPL"','more');
   ctx_ddl.add_stopword('"CNT_SPL"','most');
   ctx_ddl.add_stopword('"CNT_SPL"','no');
   ctx_ddl.add_stopword('"CNT_SPL"','not');
   ctx_ddl.add_stopword('"CNT_SPL"','of');
   ctx_ddl.add_stopword('"CNT_SPL"','on');
   ctx_ddl.add_stopword('"CNT_SPL"','one');
   ctx_ddl.add_stopword('"CNT_SPL"','only');
   ctx_ddl.add_stopword('"CNT_SPL"','or');
   ctx_ddl.add_stopword('"CNT_SPL"','other');
   ctx_ddl.add_stopword('"CNT_SPL"','out');
   ctx_ddl.add_stopword('"CNT_SPL"','over');
   ctx_ddl.add_stopword('"CNT_SPL"','s');
   ctx_ddl.add_stopword('"CNT_SPL"','says');
   ctx_ddl.add_stopword('"CNT_SPL"','she');
   ctx_ddl.add_stopword('"CNT_SPL"','so');
   ctx_ddl.add_stopword('"CNT_SPL"','some');
   ctx_ddl.add_stopword('"CNT_SPL"','such');
   ctx_ddl.add_stopword('"CNT_SPL"','than');
   ctx_ddl.add_stopword('"CNT_SPL"','that');
   ctx_ddl.add_stopword('"CNT_SPL"','the');
   ctx_ddl.add_stopword('"CNT_SPL"','their');
   ctx_ddl.add_stopword('"CNT_SPL"','there');
   ctx_ddl.add_stopword('"CNT_SPL"','they');
   ctx_ddl.add_stopword('"CNT_SPL"','this');
   ctx_ddl.add_stopword('"CNT_SPL"','to');
   ctx_ddl.add_stopword('"CNT_SPL"','up');
   ctx_ddl.add_stopword('"CNT_SPL"','was');
   ctx_ddl.add_stopword('"CNT_SPL"','we');
   ctx_ddl.add_stopword('"CNT_SPL"','were');
   ctx_ddl.add_stopword('"CNT_SPL"','when');
   ctx_ddl.add_stopword('"CNT_SPL"','which');
   ctx_ddl.add_stopword('"CNT_SPL"','who');
   ctx_ddl.add_stopword('"CNT_SPL"','will');
   ctx_ddl.add_stopword('"CNT_SPL"','with');
   ctx_ddl.add_stopword('"CNT_SPL"','would');
   ctx_ddl.add_stopword('"CNT_SPL"','aan');
   ctx_ddl.add_stopword('"CNT_SPL"','aangaande');
   ctx_ddl.add_stopword('"CNT_SPL"','aangezien');
   ctx_ddl.add_stopword('"CNT_SPL"','achter');
   ctx_ddl.add_stopword('"CNT_SPL"','achterna');
   ctx_ddl.add_stopword('"CNT_SPL"','afgelopen');
   ctx_ddl.add_stopword('"CNT_SPL"','al');
   ctx_ddl.add_stopword('"CNT_SPL"','aldaar');
   ctx_ddl.add_stopword('"CNT_SPL"','aldus');
   ctx_ddl.add_stopword('"CNT_SPL"','alhoewel');
   ctx_ddl.add_stopword('"CNT_SPL"','alias');
   ctx_ddl.add_stopword('"CNT_SPL"','alle');
   ctx_ddl.add_stopword('"CNT_SPL"','allebei');
   ctx_ddl.add_stopword('"CNT_SPL"','alleen');
   ctx_ddl.add_stopword('"CNT_SPL"','alsnog');
   ctx_ddl.add_stopword('"CNT_SPL"','altijd');
   ctx_ddl.add_stopword('"CNT_SPL"','altoos');
   ctx_ddl.add_stopword('"CNT_SPL"','ander');
   ctx_ddl.add_stopword('"CNT_SPL"','andere');
   ctx_ddl.add_stopword('"CNT_SPL"','anders');
   ctx_ddl.add_stopword('"CNT_SPL"','anderszins');
   ctx_ddl.add_stopword('"CNT_SPL"','behalve');
   ctx_ddl.add_stopword('"CNT_SPL"','behoudens');
   ctx_ddl.add_stopword('"CNT_SPL"','beide');
   ctx_ddl.add_stopword('"CNT_SPL"','beiden');
   ctx_ddl.add_stopword('"CNT_SPL"','ben');
   ctx_ddl.add_stopword('"CNT_SPL"','beneden');
   ctx_ddl.add_stopword('"CNT_SPL"','bent');
   ctx_ddl.add_stopword('"CNT_SPL"','bepaald');
   ctx_ddl.add_stopword('"CNT_SPL"','betreffende');
   ctx_ddl.add_stopword('"CNT_SPL"','bij');
   ctx_ddl.add_stopword('"CNT_SPL"','binnen');
   ctx_ddl.add_stopword('"CNT_SPL"','binnenin');
   ctx_ddl.add_stopword('"CNT_SPL"','boven');
   ctx_ddl.add_stopword('"CNT_SPL"','bovenal');
   ctx_ddl.add_stopword('"CNT_SPL"','bovendien');
   ctx_ddl.add_stopword('"CNT_SPL"','bovengenoemd');
   ctx_ddl.add_stopword('"CNT_SPL"','bovenstaand');
   ctx_ddl.add_stopword('"CNT_SPL"','bovenvermeld');
   ctx_ddl.add_stopword('"CNT_SPL"','buiten');
   ctx_ddl.add_stopword('"CNT_SPL"','daar');
   ctx_ddl.add_stopword('"CNT_SPL"','daarheen');
   ctx_ddl.add_stopword('"CNT_SPL"','daarin');
   ctx_ddl.add_stopword('"CNT_SPL"','daarna');
   ctx_ddl.add_stopword('"CNT_SPL"','daarnet');
   ctx_ddl.add_stopword('"CNT_SPL"','daarom');
   ctx_ddl.add_stopword('"CNT_SPL"','daarop');
   ctx_ddl.add_stopword('"CNT_SPL"','daarvanlangs');
   ctx_ddl.add_stopword('"CNT_SPL"','dan');
   ctx_ddl.add_stopword('"CNT_SPL"','dat');
   ctx_ddl.add_stopword('"CNT_SPL"','de');
   ctx_ddl.add_stopword('"CNT_SPL"','die');
   ctx_ddl.add_stopword('"CNT_SPL"','dikwijls');
   ctx_ddl.add_stopword('"CNT_SPL"','dit');
   ctx_ddl.add_stopword('"CNT_SPL"','door');
   ctx_ddl.add_stopword('"CNT_SPL"','doorgaand');
   ctx_ddl.add_stopword('"CNT_SPL"','dus');
   ctx_ddl.add_stopword('"CNT_SPL"','echter');
   ctx_ddl.add_stopword('"CNT_SPL"','eer');
   ctx_ddl.add_stopword('"CNT_SPL"','eerdat');
   ctx_ddl.add_stopword('"CNT_SPL"','eerder');
   ctx_ddl.add_stopword('"CNT_SPL"','eerlang');
   ctx_ddl.add_stopword('"CNT_SPL"','eerst');
   ctx_ddl.add_stopword('"CNT_SPL"','elk');
   ctx_ddl.add_stopword('"CNT_SPL"','elke');
   ctx_ddl.add_stopword('"CNT_SPL"','en');
   ctx_ddl.add_stopword('"CNT_SPL"','enig');
   ctx_ddl.add_stopword('"CNT_SPL"','enigszins');
   ctx_ddl.add_stopword('"CNT_SPL"','enkel');
   ctx_ddl.add_stopword('"CNT_SPL"','er');
   ctx_ddl.add_stopword('"CNT_SPL"','erdoor');
   ctx_ddl.add_stopword('"CNT_SPL"','even');
   ctx_ddl.add_stopword('"CNT_SPL"','eveneens');
   ctx_ddl.add_stopword('"CNT_SPL"','evenwel');
   ctx_ddl.add_stopword('"CNT_SPL"','gauw');
   ctx_ddl.add_stopword('"CNT_SPL"','gedurende');
   ctx_ddl.add_stopword('"CNT_SPL"','geen');
   ctx_ddl.add_stopword('"CNT_SPL"','gehad');
   ctx_ddl.add_stopword('"CNT_SPL"','gekund');
   ctx_ddl.add_stopword('"CNT_SPL"','geleden');
   ctx_ddl.add_stopword('"CNT_SPL"','gelijk');
   ctx_ddl.add_stopword('"CNT_SPL"','gemoeten');
   ctx_ddl.add_stopword('"CNT_SPL"','gemogen');
   ctx_ddl.add_stopword('"CNT_SPL"','geweest');
   ctx_ddl.add_stopword('"CNT_SPL"','gewoon');
   ctx_ddl.add_stopword('"CNT_SPL"','gewoonweg');
   ctx_ddl.add_stopword('"CNT_SPL"','haar');
   ctx_ddl.add_stopword('"CNT_SPL"','hadden');
   ctx_ddl.add_stopword('"CNT_SPL"','hare');
   ctx_ddl.add_stopword('"CNT_SPL"','heb');
   ctx_ddl.add_stopword('"CNT_SPL"','hebben');
   ctx_ddl.add_stopword('"CNT_SPL"','hebt');
   ctx_ddl.add_stopword('"CNT_SPL"','heeft');
   ctx_ddl.add_stopword('"CNT_SPL"','hem');
   ctx_ddl.add_stopword('"CNT_SPL"','hen');
   ctx_ddl.add_stopword('"CNT_SPL"','het');
   ctx_ddl.add_stopword('"CNT_SPL"','hierbeneden');
   ctx_ddl.add_stopword('"CNT_SPL"','hierboven');
   ctx_ddl.add_stopword('"CNT_SPL"','hij');
   ctx_ddl.add_stopword('"CNT_SPL"','hoe');
   ctx_ddl.add_stopword('"CNT_SPL"','hoewel');
   ctx_ddl.add_stopword('"CNT_SPL"','hun');
   ctx_ddl.add_stopword('"CNT_SPL"','hunne');
   ctx_ddl.add_stopword('"CNT_SPL"','ik');
   ctx_ddl.add_stopword('"CNT_SPL"','ikzelf');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','in');*/
   ctx_ddl.add_stopword('"CNT_SPL"','inmiddels');
   ctx_ddl.add_stopword('"CNT_SPL"','inzake');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','is');*/
   ctx_ddl.add_stopword('"CNT_SPL"','jezelf');
   ctx_ddl.add_stopword('"CNT_SPL"','jij');
   ctx_ddl.add_stopword('"CNT_SPL"','jijzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','jou');
   ctx_ddl.add_stopword('"CNT_SPL"','jouw');
   ctx_ddl.add_stopword('"CNT_SPL"','jouwe');
   ctx_ddl.add_stopword('"CNT_SPL"','juist');
   ctx_ddl.add_stopword('"CNT_SPL"','jullie');
   ctx_ddl.add_stopword('"CNT_SPL"','kan');
   ctx_ddl.add_stopword('"CNT_SPL"','klaar');
   ctx_ddl.add_stopword('"CNT_SPL"','kon');
   ctx_ddl.add_stopword('"CNT_SPL"','konden');
   ctx_ddl.add_stopword('"CNT_SPL"','krachtens');
   ctx_ddl.add_stopword('"CNT_SPL"','kunnen');
   ctx_ddl.add_stopword('"CNT_SPL"','kunt');
   ctx_ddl.add_stopword('"CNT_SPL"','later');
   ctx_ddl.add_stopword('"CNT_SPL"','liever');
   ctx_ddl.add_stopword('"CNT_SPL"','maar');
   ctx_ddl.add_stopword('"CNT_SPL"','mag');
   ctx_ddl.add_stopword('"CNT_SPL"','meer');
   ctx_ddl.add_stopword('"CNT_SPL"','met');
   ctx_ddl.add_stopword('"CNT_SPL"','mezelf');
   ctx_ddl.add_stopword('"CNT_SPL"','mij');
   ctx_ddl.add_stopword('"CNT_SPL"','mijn');
   ctx_ddl.add_stopword('"CNT_SPL"','mijnent');
   ctx_ddl.add_stopword('"CNT_SPL"','mijner');
   ctx_ddl.add_stopword('"CNT_SPL"','mijzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','misschien');
   ctx_ddl.add_stopword('"CNT_SPL"','mocht');
   ctx_ddl.add_stopword('"CNT_SPL"','mochten');
   ctx_ddl.add_stopword('"CNT_SPL"','moest');
   ctx_ddl.add_stopword('"CNT_SPL"','moesten');
   ctx_ddl.add_stopword('"CNT_SPL"','moet');
   ctx_ddl.add_stopword('"CNT_SPL"','moeten');
   ctx_ddl.add_stopword('"CNT_SPL"','mogen');
   ctx_ddl.add_stopword('"CNT_SPL"','na');
   ctx_ddl.add_stopword('"CNT_SPL"','naar');
   ctx_ddl.add_stopword('"CNT_SPL"','nadat');
   ctx_ddl.add_stopword('"CNT_SPL"','net');
   ctx_ddl.add_stopword('"CNT_SPL"','niet');
   ctx_ddl.add_stopword('"CNT_SPL"','noch');
   ctx_ddl.add_stopword('"CNT_SPL"','nog');
   ctx_ddl.add_stopword('"CNT_SPL"','nogal');
   ctx_ddl.add_stopword('"CNT_SPL"','nu');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','of');*/
   ctx_ddl.add_stopword('"CNT_SPL"','ofschoon');
   ctx_ddl.add_stopword('"CNT_SPL"','om');
   ctx_ddl.add_stopword('"CNT_SPL"','omdat');
   ctx_ddl.add_stopword('"CNT_SPL"','omhoog');
   ctx_ddl.add_stopword('"CNT_SPL"','omlaag');
   ctx_ddl.add_stopword('"CNT_SPL"','omstreeks');
   ctx_ddl.add_stopword('"CNT_SPL"','omtrent');
   ctx_ddl.add_stopword('"CNT_SPL"','omver');
   ctx_ddl.add_stopword('"CNT_SPL"','onder');
   ctx_ddl.add_stopword('"CNT_SPL"','ondertussen');
   ctx_ddl.add_stopword('"CNT_SPL"','ongeveer');
   ctx_ddl.add_stopword('"CNT_SPL"','ons');
   ctx_ddl.add_stopword('"CNT_SPL"','onszelf');
   ctx_ddl.add_stopword('"CNT_SPL"','onze');
   ctx_ddl.add_stopword('"CNT_SPL"','ook');
   ctx_ddl.add_stopword('"CNT_SPL"','op');
   ctx_ddl.add_stopword('"CNT_SPL"','opnieuw');
   ctx_ddl.add_stopword('"CNT_SPL"','opzij');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','over');*/
   ctx_ddl.add_stopword('"CNT_SPL"','overeind');
   ctx_ddl.add_stopword('"CNT_SPL"','overigens');
   ctx_ddl.add_stopword('"CNT_SPL"','pas');
   ctx_ddl.add_stopword('"CNT_SPL"','precies');
   ctx_ddl.add_stopword('"CNT_SPL"','reeds');
   ctx_ddl.add_stopword('"CNT_SPL"','rond');
   ctx_ddl.add_stopword('"CNT_SPL"','rondom');
   ctx_ddl.add_stopword('"CNT_SPL"','sedert');
   ctx_ddl.add_stopword('"CNT_SPL"','sinds');
   ctx_ddl.add_stopword('"CNT_SPL"','sindsdien');
   ctx_ddl.add_stopword('"CNT_SPL"','slechts');
   ctx_ddl.add_stopword('"CNT_SPL"','sommige');
   ctx_ddl.add_stopword('"CNT_SPL"','spoedig');
   ctx_ddl.add_stopword('"CNT_SPL"','steeds');
   ctx_ddl.add_stopword('"CNT_SPL"','tamelijk');
   ctx_ddl.add_stopword('"CNT_SPL"','tenzij');
   ctx_ddl.add_stopword('"CNT_SPL"','terwijl');
   ctx_ddl.add_stopword('"CNT_SPL"','thans');
   ctx_ddl.add_stopword('"CNT_SPL"','tijdens');
   ctx_ddl.add_stopword('"CNT_SPL"','toch');
   ctx_ddl.add_stopword('"CNT_SPL"','toen');
   ctx_ddl.add_stopword('"CNT_SPL"','toenmaals');
   ctx_ddl.add_stopword('"CNT_SPL"','toenmalig');
   ctx_ddl.add_stopword('"CNT_SPL"','tot');
   ctx_ddl.add_stopword('"CNT_SPL"','totdat');
   ctx_ddl.add_stopword('"CNT_SPL"','tussen');
   ctx_ddl.add_stopword('"CNT_SPL"','uit');
   ctx_ddl.add_stopword('"CNT_SPL"','uitgezonderd');
   ctx_ddl.add_stopword('"CNT_SPL"','vaak');
   ctx_ddl.add_stopword('"CNT_SPL"','van');
   ctx_ddl.add_stopword('"CNT_SPL"','vandaan');
   ctx_ddl.add_stopword('"CNT_SPL"','vanuit');
   ctx_ddl.add_stopword('"CNT_SPL"','vanwege');
   ctx_ddl.add_stopword('"CNT_SPL"','veeleer');
   ctx_ddl.add_stopword('"CNT_SPL"','verder');
   ctx_ddl.add_stopword('"CNT_SPL"','vervolgens');
   ctx_ddl.add_stopword('"CNT_SPL"','vol');
   ctx_ddl.add_stopword('"CNT_SPL"','volgens');
   ctx_ddl.add_stopword('"CNT_SPL"','voor');
   ctx_ddl.add_stopword('"CNT_SPL"','vooraf');
   ctx_ddl.add_stopword('"CNT_SPL"','vooral');
   ctx_ddl.add_stopword('"CNT_SPL"','vooralsnog');
   ctx_ddl.add_stopword('"CNT_SPL"','voorbij');
   ctx_ddl.add_stopword('"CNT_SPL"','voordat');
   ctx_ddl.add_stopword('"CNT_SPL"','voordezen');
   ctx_ddl.add_stopword('"CNT_SPL"','voordien');
   ctx_ddl.add_stopword('"CNT_SPL"','voorheen');
   ctx_ddl.add_stopword('"CNT_SPL"','voorop');
   ctx_ddl.add_stopword('"CNT_SPL"','vooruit');
   ctx_ddl.add_stopword('"CNT_SPL"','vrij');
   ctx_ddl.add_stopword('"CNT_SPL"','vroeg');
   ctx_ddl.add_stopword('"CNT_SPL"','waar');
   ctx_ddl.add_stopword('"CNT_SPL"','waarom');
   ctx_ddl.add_stopword('"CNT_SPL"','wanneer');
   ctx_ddl.add_stopword('"CNT_SPL"','want');
   ctx_ddl.add_stopword('"CNT_SPL"','waren');
 /*  ctx_ddl.add_stopword('"CNT_SPL"','was');*/
   ctx_ddl.add_stopword('"CNT_SPL"','wat');
   ctx_ddl.add_stopword('"CNT_SPL"','weer');
   ctx_ddl.add_stopword('"CNT_SPL"','weg');
   ctx_ddl.add_stopword('"CNT_SPL"','wegens');
   ctx_ddl.add_stopword('"CNT_SPL"','wel');
   ctx_ddl.add_stopword('"CNT_SPL"','weldra');
   ctx_ddl.add_stopword('"CNT_SPL"','welk');
   ctx_ddl.add_stopword('"CNT_SPL"','welke');
   ctx_ddl.add_stopword('"CNT_SPL"','wie');
   ctx_ddl.add_stopword('"CNT_SPL"','wiens');
   ctx_ddl.add_stopword('"CNT_SPL"','wier');
   ctx_ddl.add_stopword('"CNT_SPL"','wij');
   ctx_ddl.add_stopword('"CNT_SPL"','wijzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','zal');
   ctx_ddl.add_stopword('"CNT_SPL"','ze');
   ctx_ddl.add_stopword('"CNT_SPL"','zelfs');
   ctx_ddl.add_stopword('"CNT_SPL"','zichzelf');
   ctx_ddl.add_stopword('"CNT_SPL"','zij');
   ctx_ddl.add_stopword('"CNT_SPL"','zijn');
   ctx_ddl.add_stopword('"CNT_SPL"','zijne');
   ctx_ddl.add_stopword('"CNT_SPL"','zo');
   ctx_ddl.add_stopword('"CNT_SPL"','zodra');
   ctx_ddl.add_stopword('"CNT_SPL"','zonder');
   ctx_ddl.add_stopword('"CNT_SPL"','zou');
   ctx_ddl.add_stopword('"CNT_SPL"','zouden');
   ctx_ddl.add_stopword('"CNT_SPL"','zowat');
   ctx_ddl.add_stopword('"CNT_SPL"','zulke');
   ctx_ddl.add_stopword('"CNT_SPL"','zullen');
   ctx_ddl.add_stopword('"CNT_SPL"','zult');
 end;
 /
 
 begin
   ctx_ddl.drop_preference('"CNT_STO"');
 end;
 /
 begin
   ctx_ddl.create_preference('"CNT_STO"','BASIC_STORAGE');
   ctx_ddl.set_attribute('"CNT_STO"','R_TABLE_CLAUSE','lob (data) store as (cache
 )');
   ctx_ddl.set_attribute('"CNT_STO"','I_INDEX_CLAUSE','compress 2');
 end;
 /
 
 /* Debug/progress check, will log in $ORACLE_HOME/ctx/log */
 begin
   ctx_output.start_log('CNT_LOG');
 end;
 /
 
 /* If this is a production database, fill in the added column */
 /* with the correct info to rebuild the index later           */
 update attachments
 set fmt='ignore'
 where
 contentencoding is NULL
 
 update attachments
 set fmt='ignore'
 where
 contentencoding = 'base64'
 
 update attachments
 set fmt='text'
 where
 contentencoding = 'none'
 
 update attachments
 set fmt='text'
 where
 contentencoding = 'quoted-printable'
 
 
 /* Extent the Attachments table with an extra column */
 /* This needs te be done only once                   */
 ALTER TABLE RT_USER.ATTACHMENTS
  ADD (context_fmt  VARCHAR2(10));
 
 drop index cnt;
 
 create index "RT_USER"."CNT"
   on "RT_USER"."ATTACHMENTS"
       ("CONTENT")
   indextype is ctxsys.context
   parameters('
     format column    context_fmt
     datastore       "CNT_DST"
     filter          "CNT_FIL"
     section group   "CNT_SGP"
     lexer           "CNT_LEX"
     wordlist        "CNT_WDL"
     stoplist        "CNT_SPL"
     storage         "CNT_STO"
   ')
 /
 
 begin
   ctx_output.end_log;
 end;
 /
 
 
 /* Add a trigger so that new rows are properly tagged */
 CREATE OR REPLACE TRIGGER RT_USER.BI_Att
 BEFORE INSERT
 ON RT_USER.ATTACHMENTS
 REFERENCING NEW AS New OLD AS Old
 FOR EACH ROW
 DECLARE
 /******************************************************************************
    NAME:
    PURPOSE:
 
    REVISIONS:
    Ver        Date        Author           Description
    ---------  ----------  ---------------  ------------------------------------
    1.0        24-5-2007   Joop v/d Wege   1. Created this trigger.
 
    NOTES:
    Trigger takes care of filling the context_fmt column with the right
    value so that only text is really indexed and not base64 rows
 
 ******************************************************************************/
 BEGIN
 
     IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
        :new.context_fmt := 'ignore';
     ELSE
         :new.context_fmt := 'text';
     END IF;
 
 END ;
 /
 
 
 /* There needs to be a job that on a regular basis refreshes the index */
 BEGIN
   SYS.DBMS_JOB.REMOVE(21);
 COMMIT;
 END;
 /
 
 DECLARE
   X NUMBER;
 BEGIN
   SYS.DBMS_JOB.SUBMIT
   ( job       => X
    ,what      => 'DECLARE
    stmt VARCHAR2(200);
 BEGIN
    stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
    EXECUTE IMMEDIATE stmt;
 END;
 '
    ,next_date => to_date('20-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
    ,interval  => 'TRUNC(SYSDATE+1)+6/24'
    ,no_parse  => FALSE
   );
   SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
 COMMIT;
 END;
 /
 
 /* test if things work                                                      */
 /* first run is slow, second should be fast <500ms)                         */
 select * from attachments where contains(content,'accounts')>0;
 
 /****************************************************************************/
 /* Repeat for ObjectCustomFieldValues                                       */
 /****************************************************************************/
 
 alter table objectcustomfieldvalues add (context_fmt varchar2(10));
 
 update objectcustomfieldvalues
 set context_fmt='ignore'
 where
 contentencoding is NULL;
 
 update objectcustomfieldvalues
 set context_fmt='ignore'
 where
 contentencoding = 'base64';
 
 update objectcustomfieldvalues
 set context_fmt='text'
 where
 contentencoding = 'none';
 
 update objectcustomfieldvalues
 set context_fmt='text'
 where
 contentencoding = 'quoted-printable';
 
 
 CREATE OR REPLACE TRIGGER RT_USER.BI_OCFV
 BEFORE INSERT
 ON RT_USER.objectcustomfieldvalues
 REFERENCING NEW AS New OLD AS Old
 FOR EACH ROW
 DECLARE
 /******************************************************************************
    NAME:
    PURPOSE:
 
    REVISIONS:
    Ver        Date        Author           Description
    ---------  ----------  ---------------  ------------------------------------
    1.0        24-5-2007   Joop v/d Wege   1. Created this trigger.
 
    NOTES:
    Trigger takes care of filling the context_fmt column with the right
    value so that only text is really indexed and not base64 rows
 
 ******************************************************************************/
 BEGIN
 
     IF :new.CONTENTENCODING = 'base64' OR :new.CONTENTENCODING is NULL THEN
        :new.context_fmt := 'ignore';
     ELSE
         :new.context_fmt := 'text';
     END IF;
 
 END ;
 /
 
 
 drop index ocfv_cnt;
 
 create index ocfv_cnt
   on "RT_USER"."OBJECTCUSTOMFIELDVALUES"
       ("LARGECONTENT")
   indextype is ctxsys.context
   parameters('
     format column    context_fmt
     datastore       "CNT_DST"
     filter          "CNT_FIL"
     section group   "CNT_SGP"
     lexer           "CNT_LEX"
     wordlist        "CNT_WDL"
     stoplist        "CNT_SPL"
     storage         "CNT_STO"
   ')
 /
 
 
 select * from objectcustomfieldvalues where contains(largecontent,'accounts')>0
 
 /* depending on whether both RT and RTFM are used use this job instead of the */
 /* previous one                                                               */
 BEGIN
   SYS.DBMS_JOB.REMOVE(21);
 COMMIT;
 END;
 /
 
 DECLARE
   X NUMBER;
 BEGIN
   SYS.DBMS_JOB.SUBMIT
   ( job       => X
    ,what      => 'DECLARE
    stmt VARCHAR2(200);
 BEGIN
    stmt := ''ALTER INDEX cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
    EXECUTE IMMEDIATE stmt;
    stmt := ''ALTER INDEX ocfv_cnt rebuild PARAMETERS('' || ''''''sync memory 40M'''''' || '')'';
    EXECUTE IMMEDIATE stmt;
 
 END;
 '
    ,next_date => to_date('25-07-2007 06:00:00','dd/mm/yyyy hh24:mi:ss')
    ,interval  => 'TRUNC(SYSDATE+1)+6/24'
    ,no_parse  => FALSE
   );
   SYS.DBMS_OUTPUT.PUT_LINE('Job Number is: ' || to_char(x));
 COMMIT;
 END;
 /
 

End OracleTest part