From 217961570f08f0d149313f355a36f9e858856b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Berenguel?= Date: Thu, 3 Oct 2024 00:46:42 -0300 Subject: [PATCH] #2286: use ufed:jumptargets in Referenced By / Referencing tables --- .../iped/app/ui/ReferencedByTableModel.java | 45 +++++++--- .../iped/app/ui/ReferencingTableModel.java | 90 ++++++++++++------- .../engine/lucene/analysis/AppAnalyzer.java | 6 +- 3 files changed, 98 insertions(+), 43 deletions(-) diff --git a/iped-app/src/main/java/iped/app/ui/ReferencedByTableModel.java b/iped-app/src/main/java/iped/app/ui/ReferencedByTableModel.java index 5fc0fbf940..9fbe37e692 100644 --- a/iped-app/src/main/java/iped/app/ui/ReferencedByTableModel.java +++ b/iped-app/src/main/java/iped/app/ui/ReferencedByTableModel.java @@ -19,12 +19,20 @@ package iped.app.ui; import java.util.Arrays; +import java.util.List; import java.util.stream.Collectors; import javax.swing.ListSelectionModel; import javax.swing.SwingUtilities; +import org.apache.commons.lang3.StringUtils; import org.apache.lucene.document.Document; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; import iped.data.IItem; import iped.engine.search.IPEDSearcher; @@ -70,20 +78,36 @@ public void valueChanged(ListSelectionModel lsm) { @Override protected void internalListItems(Document doc) { + results = new LuceneSearchResult(0); + fireTableDataChanged(); + + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + + // hashes String md5 = doc.get(HashTask.HASH.MD5.toString()); String sha1 = doc.get(HashTask.HASH.SHA1.toString()); String sha256 = doc.get(HashTask.HASH.SHA256.toString()); String edonkey = doc.get(HashTask.HASH.EDONKEY.toString()); - String hashes = Arrays.asList(md5, sha1, sha256, edonkey).stream().filter(a -> a != null).collect(Collectors.joining(" ")); + List hashes = Arrays.asList(md5, sha1, sha256, edonkey).stream().filter(StringUtils::isNotBlank) + .map(h -> new BytesRef(h)).collect(Collectors.toList()); + if (!hashes.isEmpty()) { + queryBuilder.add(new TermInSetQuery(ExtraProperties.LINKED_ITEMS, hashes), Occur.SHOULD); + queryBuilder.add(new TermInSetQuery(ExtraProperties.SHARED_HASHES, hashes), Occur.SHOULD); + } - if (hashes.isEmpty()) { - results = new LuceneSearchResult(0); - } else { - String textQuery = ExtraProperties.LINKED_ITEMS + ":(" + hashes + ") "; - textQuery += ExtraProperties.SHARED_HASHES + ":(" + hashes + ")"; + // ufed:id + String ufedId = doc.get(ExtraProperties.UFED_ID); + if (StringUtils.isNotBlank(ufedId)) { + queryBuilder.add(new TermQuery(new Term(ExtraProperties.UFED_JUMP_TARGETS, ufedId)), Occur.SHOULD); + queryBuilder.add(new TermQuery(new Term(ExtraProperties.UFED_FILE_ID, ufedId)), Occur.SHOULD); + } + BooleanQuery query = queryBuilder.build(); + + if (!query.clauses().isEmpty()) { try { - IPEDSearcher task = new IPEDSearcher(App.get().appCase, textQuery, BasicProps.NAME); + IPEDSearcher task = new IPEDSearcher(App.get().appCase, query, BasicProps.NAME); + task.setRewritequery(false); results = MultiSearchResult.get(task.multiSearch(), App.get().appCase); final int length = results.getLength(); @@ -92,17 +116,16 @@ protected void internalListItems(Document doc) { SwingUtilities.invokeLater(new Runnable() { @Override public void run() { - App.get().referencedByDock.setTitleText(Messages.getString("ReferencedByTab.Title") + " " + length); + App.get().referencedByDock + .setTitleText(Messages.getString("ReferencedByTab.Title") + " " + length); } }); } - } catch (Exception e) { results = new LuceneSearchResult(0); e.printStackTrace(); } + fireTableDataChanged(); } - - fireTableDataChanged(); } } diff --git a/iped-app/src/main/java/iped/app/ui/ReferencingTableModel.java b/iped-app/src/main/java/iped/app/ui/ReferencingTableModel.java index 716159d0d9..7c35100a8e 100644 --- a/iped-app/src/main/java/iped/app/ui/ReferencingTableModel.java +++ b/iped-app/src/main/java/iped/app/ui/ReferencingTableModel.java @@ -18,20 +18,29 @@ */ package iped.app.ui; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; + import javax.swing.ListSelectionModel; import javax.swing.SwingUtilities; +import org.apache.commons.lang3.StringUtils; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; import iped.engine.search.IPEDSearcher; import iped.engine.search.LuceneSearchResult; import iped.engine.search.MultiSearchResult; +import iped.engine.search.QueryBuilder; import iped.engine.task.HashTask; +import iped.exception.ParseException; +import iped.exception.QueryNodeException; import iped.parsers.ares.AresParser; import iped.parsers.emule.KnownMetParser; import iped.parsers.shareaza.ShareazaLibraryDatParser; @@ -57,40 +66,60 @@ protected void internalListItems(Document doc) { results = new LuceneSearchResult(0); fireTableDataChanged(); - StringBuilder textQuery = null; - Query query = null; + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + // linkedItems queries String[] linkedItems = doc.getValues(ExtraProperties.LINKED_ITEMS); - if (linkedItems != null && linkedItems.length > 0) { - textQuery = new StringBuilder(); + if (linkedItems.length > 0) { + QueryBuilder b = new QueryBuilder(App.get().appCase); for (String q : linkedItems) { - textQuery.append("(").append(q).append(") "); - } - } else { - linkedItems = doc.getValues(ExtraProperties.SHARED_HASHES); - if (linkedItems != null && linkedItems.length > 0) { - String term; - String mediaType = doc.get(BasicProps.CONTENTTYPE); - if (KnownMetParser.EMULE_MIME_TYPE.equals(mediaType)) { - term = HashTask.HASH.EDONKEY.toString(); - } else if (AresParser.ARES_MIME_TYPE.equals(mediaType)) { - term = HashTask.HASH.SHA1.toString(); - } else if (ShareazaLibraryDatParser.LIBRARY_DAT_MIME_TYPE.equals(mediaType)) { - term = HashTask.HASH.MD5.toString(); - } else { - term = BasicProps.HASH; + try { + queryBuilder.add(b.getQuery(q), Occur.SHOULD); + } catch (ParseException | QueryNodeException e) { + e.printStackTrace(); } - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (String hash : linkedItems) { - builder.add(new TermQuery(new Term(term, hash)), Occur.SHOULD); - } - query = builder.build(); } } - if (textQuery != null || query != null) { + // sharedHashes + String[] sharedHashes = doc.getValues(ExtraProperties.SHARED_HASHES); + if (sharedHashes.length > 0) { + String field; + String mediaType = doc.get(BasicProps.CONTENTTYPE); + if (KnownMetParser.EMULE_MIME_TYPE.equals(mediaType)) { + field = HashTask.HASH.EDONKEY.toString(); + } else if (AresParser.ARES_MIME_TYPE.equals(mediaType)) { + field = HashTask.HASH.SHA1.toString(); + } else if (ShareazaLibraryDatParser.LIBRARY_DAT_MIME_TYPE.equals(mediaType)) { + field = HashTask.HASH.MD5.toString(); + } else { + field = BasicProps.HASH; + } + + Set hashes = Arrays.asList(sharedHashes).stream().filter(StringUtils::isNotBlank) + .map(h -> new BytesRef(h)).collect(Collectors.toSet()); + queryBuilder.add(new TermInSetQuery(field, hashes), Occur.SHOULD); + } + + // ufed:jumptargets + String[] ufedJumpTargets = doc.getValues(ExtraProperties.UFED_JUMP_TARGETS); + if (ufedJumpTargets.length > 0) { + Set targets = Arrays.asList(ufedJumpTargets).stream().filter(StringUtils::isNotBlank) + .map(h -> new BytesRef(h)).collect(Collectors.toSet()); + queryBuilder.add(new TermInSetQuery(ExtraProperties.UFED_ID, targets), Occur.SHOULD); + } + + // ufed:file_id (needed? alrealdy contained in linkedItems) + String ufedFileId = doc.get(ExtraProperties.UFED_FILE_ID); + if (ufedFileId != null) { + queryBuilder.add(new TermQuery(new Term(ExtraProperties.UFED_ID, ufedFileId)), Occur.SHOULD); + } + + BooleanQuery query = queryBuilder.build(); + + if (!query.clauses().isEmpty()) { try { - IPEDSearcher task = query != null ? new IPEDSearcher(App.get().appCase, query, BasicProps.NAME) : new IPEDSearcher(App.get().appCase, textQuery.toString(), BasicProps.NAME); + IPEDSearcher task = new IPEDSearcher(App.get().appCase, query, BasicProps.NAME); task.setRewritequery(false); results = MultiSearchResult.get(task.multiSearch(), App.get().appCase); @@ -100,17 +129,16 @@ protected void internalListItems(Document doc) { SwingUtilities.invokeLater(new Runnable() { @Override public void run() { - App.get().referencesDock.setTitleText(Messages.getString("ReferencesTab.Title") + " " + length); + App.get().referencesDock + .setTitleText(Messages.getString("ReferencesTab.Title") + " " + length); } }); } - } catch (Exception e) { results = new LuceneSearchResult(0); e.printStackTrace(); } + fireTableDataChanged(); } - - fireTableDataChanged(); } } diff --git a/iped-engine/src/main/java/iped/engine/lucene/analysis/AppAnalyzer.java b/iped-engine/src/main/java/iped/engine/lucene/analysis/AppAnalyzer.java index 7fcc999baf..3055a31d41 100644 --- a/iped-engine/src/main/java/iped/engine/lucene/analysis/AppAnalyzer.java +++ b/iped-engine/src/main/java/iped/engine/lucene/analysis/AppAnalyzer.java @@ -32,6 +32,7 @@ import iped.engine.task.PhotoDNATask; import iped.engine.task.index.IndexItem; import iped.localization.LocalizedProperties; +import iped.properties.ExtraProperties; /* * Define analizadores, tokenizadores implicitamente, de indexação específicos para cada propriedade, @@ -46,7 +47,10 @@ public static Analyzer get() { analyzerPerField.put(IndexItem.ID, new KeywordAnalyzer()); analyzerPerField.put(IndexItem.PARENTID, new KeywordAnalyzer()); analyzerPerField.put(IndexItem.EVIDENCE_UUID, new KeywordAnalyzer()); - analyzerPerField.put(UfedXmlReader.UFED_ID, new KeywordAnalyzer()); + analyzerPerField.put(ExtraProperties.UFED_ID, new KeywordAnalyzer()); + analyzerPerField.put(ExtraProperties.UFED_JUMP_TARGETS, new KeywordAnalyzer()); + analyzerPerField.put(ExtraProperties.UFED_FILE_ID, new KeywordAnalyzer()); + analyzerPerField.put(ExtraProperties.UFED_COORDINATE_ID, new KeywordAnalyzer()); analyzerPerField.put(IndexItem.CREATED, new KeywordAnalyzer()); analyzerPerField.put(IndexItem.MODIFIED, new KeywordAnalyzer());