Wednesday, September 3, 2008

Oracle Text - Word Searches

Define Onoff = Off
rem +-----------------------------------------------------------------+
rem | Script ID: doi.sql
rem |
rem | Purpose: Oracle Text Search on words from the Declaration
rem | of Independence. Added some dog words to test
rem | 'About'. No luck with 'About' or 'stem'.
rem |
rem | Developer: Lynn Tobias
rem | Script Date: 9/2/2008
rem | Oracle Ver: 10g
rem |
rem | Table(s) Used: DOI_Context and DOI_CTXCAT created in this query.
rem |
rem | Revisions: Dvl Date Ver Comment/Change
rem | --- -------- --- --------------------------------+
rem |
rem +-----------------------------------------------------------------+

Clear Screen
Set Echo &Onoff Feedback &Onoff Linesize 100 Term Off

Column Words Format A64 Word_Wrapped
Column Pid Format 99 Heading PId

--+--------------------------------------------------------------------------+
--| Drop the table before recreating it and inserting a few recs
--+--------------------------------------------------------------------------+

Drop Table Doi_Context Cascade Constraints Purge;
Drop Table Doi_Ctxcat Cascade Constraints Purge;

Create Table Doi_Context
( Pid Number(2)
, Words Varchar2(4000)
, Constraint Doi_Pk Primary Key (Pid)
);
Insert Into DOI_Context Values(1,'Governments Governed people LAWS abolishing GOVERNORS poodle');
Insert Into DOI_Context Values(2,'representation labrador justice people basset hound ');
Insert Into DOI_Context Values(3,'laws representative peace corgi representatives people');
Insert Into DOI_Context Values(4,'laws abolishing government german shepherd justice peace');
Commit;

--+--------------------------------------------------------------------------+
--| Create a 2nd similar table to test ctxcat index and Catsearch.
--| "You must create a primary key for table."
--+--------------------------------------------------------------------------+

Create Table Doi_Ctxcat
As Select *
From Doi_Context;

Alter Table Doi_Ctxcat Add Primary Key (Pid);

--+--------------------------------------------------------------------------+
--| This is my attempt to get the stem and 'about' to work. No luck yet.
--| The lexer parameter is copied from the internet.
--+--------------------------------------------------------------------------+

Begin
Ctx_Ddl.Create_Preference( 'MyLex', 'Basic_Lexer' );
Ctx_Ddl.Set_Attribute ( 'MyLex', 'Printjoins' , '_-' );
Ctx_Ddl.Set_Attribute ( 'MyLex', 'Index_Themes', 'yes');
Ctx_Ddl.Set_Attribute ( 'MyLex', 'Index_Text' , 'yes');
End;
/
Create Index Doi_Context_Index
On Doi_Context(Words)
Indextype Is Ctxsys.Context
Parameters ( 'Lexer MyLex' );

Create Index Doi_Ctxcat_Index
On Doi_Ctxcat(Words)
Indextype Is Ctxsys.Ctxcat;

--+--------------------------------------------------------------------------+
--| When the text is changed, the index is out of sync unless you execute
--| the sync_index statement -- or you include the sync parameter in the
--| create index.
--+--------------------------------------------------------------------------+

--Parameters('sync (On Commit)');
--execute CTX_DDL.Sync_index('doi_context_index');

--+--------------------------------------------------------------------------+
--| Show the indexes created
--+--------------------------------------------------------------------------+
Column Owner Format A6
Column Constraint_Name Format A17
Column Deferrable Format A10
Column Generated Format A9
Column Index_Name Format A17

Select *
From User_Constraints
Where Constraint_Name Like '%DOI%'
Or Table_Name Like '%DOI%';

--+--------------------------------------------------------------------------+
--| These gave identical results. Display all lines with 'people'
--+--------------------------------------------------------------------------+
Set Term On
Column Words Heading 'Contains(Words, people)'

Select *
From Doi_Context
Where Contains(Words, 'people') >0;

--Select * From Doi_Ctxcat Where Catsearch(Words, 'people', NULL) >0;

--+--------------------------------------------------------------------------+
--| Print any lines with both 'laws' and 'people' in it.
--+--------------------------------------------------------------------------+
Column Words Heading 'Contains(Words, laws AND people)'

Select Pid, Words
From Doi_Context
Where Contains(Words,'laws AND people')>0;

--Select Pid, Words From Doi_Ctxcat Where Catsearch(Words,'laws people', NULL)>0;

--+--------------------------------------------------------------------------+
--| Any line with one word or the other
--+--------------------------------------------------------------------------+
Column Words Heading 'Contains(Words,laws Or People)'

Select Pid, Words
From Doi_Context
Where Contains(Words,'laws Or People')>0;

--select Pid, words From DOI_Ctxcat where catsearch(words,'laws |people', NULL)>0;

--+--------------------------------------------------------------------------+
--| Any line with one word near the other
--+--------------------------------------------------------------------------+
Column Words Heading 'Contains(Words,laws Near People)'

Select Pid, Words, Score(10)
From Doi_Context
Where Contains(Words,'laws Near People',10)>0;

--+--------------------------------------------------------------------------+
--| Any line with first word but not second
--+--------------------------------------------------------------------------+
Column Words Heading 'Contains(Words,laws - People) Laws but not people'

Select Pid, Words
From Doi_Context
Where Contains(Words,'laws - People')>0;

--Select Pid, Words From Doi_Ctxcat Where Catsearch(Words,'laws - People', Null)>0;

--+--------------------------------------------------------------------------+
--| One of three words
--+--------------------------------------------------------------------------+
Column Words Heading 'Contains(Words,Government Or People Or Abolishing'

Select Pid, Words, Score(10)
From Doi_Context
Where Contains(Words,'Government Or People Or Abolishing',10)>0;

Column Words Heading 'Words Weighted: government*3 Or People*1 Or Abolishing*2'

Select Pid, Words,Score(10)
From Doi_Context
Where Contains(Words,'government*3 Or People*1 Or Abolishing*2',10)>3;

--+--------------------------------------------------------------------------+
--| % is wildcard. $ is stem search (not working)
--+--------------------------------------------------------------------------+
Column Words Heading 'Wildcards: (Words,represent%'

Select Pid, Words, Score(10)
From Doi_Context
Where Contains(Words,'represent%',10)>0;

--Select Pid, Words, Score(10)
--From Doi_Context
--Where Contains(Words,'$Represent',10)>0;

--+--------------------------------------------------------------------------+
--| Fuzzy Match: Justis -> Justice
--+--------------------------------------------------------------------------+
Column Words Heading 'Fuzzy Match: (Words,?Justis'

Select Pid, Words, Score(10)
From Doi_Context
Where Contains(Words,'?Justis',10)>0;

--+--------------------------------------------------------------------------+
--| Perform a Soundex Search
--+--------------------------------------------------------------------------+
Column Words Heading 'Soundex: !Piece'

Select Pid,Words, Score(10)
From Doi_Context
Where Contains(Words,'!Piece',10)>0;

--+--------------------------------------------------------------------------+
--| Topic Search - not working yet
--+--------------------------------------------------------------------------+
--Column Words Heading 'Topic Seach: About(Dog)

--Select Pid, Words, Score(10)
-- From Doi_Context
-- Where Contains(Words,'about(Dog)',10)>0;

No comments:

Post a Comment