The Author Online Book Forums are Moving

The Author Online Book Forums will soon redirect to Manning's liveBook and liveVideo. All book forum content will migrate to liveBook's discussion forum and all video forum content will migrate to liveVideo. Log in to liveBook or liveVideo with your Manning credentials to join the discussion!

Thank you for your engagement in the AoF over the years! We look forward to offering you a more enhanced forum experience.

paulocsc (7) [Avatar] Offline
#1
I did a test to understand the use of '*' and '?'.
If I use StandardAnalyzer I have espected results by if a use BrazilianAnalyzer I have a mistake result.
Please, where is my mistake? Junit is at the end.
Thanks.
Paulo Cesar

cities in my Lucene data base=> {"Brasília","Brasilândia","Braslândia", "São Paulo", "São Roque", "Salvador"};

>>> Using StandardAnalyzer <<<
Lucene query: 'cityname:bras*'
Found 3 document(s) that matched query 'bras*':
Brasília
Brasilândia
Braslândia

Lucene query: 'cityname:bras?lia'
Found 1 document(s) that matched query 'bras?lia':
Brasília

Lucene query: 'cityname:bras*dia'
Found 2 document(s) that matched query 'bras*dia':
Brasilândia
Braslândia

>>> Using BrazilianAnalyzer <<<<

Lucene query: 'cityname:bras*'
Found 3 document(s) that matched query 'bras*':
Brasília
Brasilândia
Braslândia

Lucene query: 'cityname:bras?lia'
Found 0 document(s) that matched query 'bras?lia': <=== ????

Lucene query: 'cityname:bras*dia'
Found 0 document(s) that matched query 'bras*dia': <=== ????



>>>>>>>> JUnit <<<<<<<<<<<<

package tcu.util.bancodadostextual;

import java.io.File;
import java.io.IOException;

import junit.framework.TestCase;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class EntendendoAnalyser extends TestCase {

public void testMakeDataBase() throws Exception {

createDB();
insertCities();
searchCities();
terminate();

}

private void searchCities() throws CorruptIndexException, IOException, ParseException {

search("bras*");
search("bras?lia");
search("bras*dia");

}

private void search(String expression) throws CorruptIndexException, IOException, ParseException {

IndexSearcher is = new IndexSearcher(indexWriter.getReader());
QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_CITY_NAME, analyzer);
Query query = parser.parse(expression);
System.out.println("
Lucene query: '" + query.toString() + "'");

TopDocs hits = is.search(query, 10);
System.out.println("Found " + hits.totalHits +
" document(s) that matched query '" + expression + "':");

for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get(FIELD_CITY_NAME));
}
is.close();

}


private void insertCities() throws CorruptIndexException, IOException, Exception {

for (int i =0; i < cities.length; i++)
indexWriter.addDocument(getDocument(cities[i]));

}

private Document getDocument(String city) throws Exception {

Document doc = new Document();
doc.add(new Field("id", ++seq+"",Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field(FIELD_CITY_NAME, city, Field.Store.YES, Field.Index.ANALYZED));

return doc;
}

private void terminate() throws CorruptIndexException, IOException {

if (indexWriter != null) {
indexWriter.deleteAll();
indexWriter.close();
}

}


private void createDB() throws IOException {

File dirIdx = new File(nameDB);

if (!dirIdx.exists())
dirIdx.mkdirs();

directory = FSDirectory.open(dirIdx);
indexWriter = new IndexWriter(directory, analyzer, autoCommit, IndexWriter.MaxFieldLength.UNLIMITED);

}

/*
* Variables
*/
private final String FIELD_CITY_NAME = "cityname";
private String[] cities = {"Brasília","Brasilândia","Braslândia", "São Paulo", "São Roque", "Salvador"};
private int seq =0;
private String nameDB = "/tmp/lucene/test";
private Directory directory;
private boolean autoCommit = true;
private IndexWriter indexWriter;
// private Analyzer analyzer = new BrazilianAnalyzer(org.apache.lucene.util.Version.LUCENE_30);
private Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_30);

}
mike.mccandless (221) [Avatar] Offline
#2
Re: Is it a bug in searche
Hi, could you post this to java-user@lucene.apache.org instead?