Package com.nuix.superutilities.regex
Class RegexScanner
- java.lang.Object
-
- com.nuix.superutilities.regex.RegexScanner
-
public class RegexScanner extends java.lang.Object
Class for scanning a series of items with a series of regular expressions.
-
-
Constructor Summary
Constructors Constructor Description RegexScanner()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description void
abortScan()
When running a scan by providing a Consumer callback, this will signal that further scanning should be aborted.void
addPattern(java.lang.String title, java.lang.String expression)
Adds a regular expression to be part of the scan with a given title.protected void
fireProgressUpdated(int value)
Fires progress update if there is a callback listeningprotected void
fireScanError(RegexScanError error)
Fires error event if there is a callback listening.boolean
getCaptureContextualText()
boolean
getCaseSensitive()
int
getContextSize()
static java.lang.String
getContextualSubString(java.lang.CharSequence textSequence, int matchStart, int matchEnd, int contextSize)
java.util.List<java.lang.String>
getCustomMetadataToScan()
boolean
getDotall()
boolean
getMatchNamedEntityValues()
boolean
getMultiline()
java.util.Set<java.lang.String>
getNamedEntityTypes()
java.util.List<PatternInfo>
getPatterns()
java.util.List<java.lang.String>
getPropertiesToScan()
boolean
getScanContent()
boolean
getScanCustomMetadata()
boolean
getScanProperties()
static java.util.Map<java.lang.String,java.lang.String>
getStringCustomMetadata(nuix.Item item, java.util.Set<java.lang.String> specificFields)
Convenience method for converting the custom metadata fields of an item into a Map<String,String> so that regular expressions may be ran against them.static java.util.Map<java.lang.String,java.lang.String>
getStringProperties(nuix.Item item, java.util.Set<java.lang.String> specificProperties)
Convenience method for converting the metadata properties of an item into a Map<String,String> so that regular expressions may be ran against them.protected ItemRegexMatchCollection
scanItem(nuix.Item item)
Scans a single itemjava.util.List<ItemRegexMatchCollection>
scanItems(java.util.Collection<nuix.Item> items)
Scans a series of items serially (no concurrency)void
scanItems(java.util.Collection<nuix.Item> items, java.util.function.Consumer<ItemRegexMatchCollection> callback)
Scans a series of items, providing each item's matches to callback as they are obtained.void
scanItemsParallel(java.util.Collection<nuix.Item> items, java.util.function.Consumer<ItemRegexMatchCollection> callback)
Scans a series of items, providing each item's matches to callback as they are obtained.void
scanItemsParallel(java.util.Collection<nuix.Item> items, java.util.function.Consumer<ItemRegexMatchCollection> callback, int concurrency)
Scans a series of items, providing each item's matches to callback as they are obtained.void
setCaptureContextualText(boolean captureContextualText)
void
setCaseSensitive(boolean caseSensitive)
void
setContextSize(int contextSize)
void
setCustomMetadataToScan(java.util.List<java.lang.String> fieldsToScan)
void
setDotall(boolean dotall)
void
setMatchNamedEntityValues(boolean matchNamedEntityValues)
static void
setMaxToStringLength(int maxLength)
Configures the character count threshold in which the CharSequence TextObject of an item, obtained from the API, is first converted to a String object before being scanned for regular expression matches.void
setMultiline(boolean multiline)
void
setNamedEntityTypes(java.util.Collection<java.lang.String> namedEntityTypes)
void
setPatterns(java.util.List<PatternInfo> patterns)
void
setPropertiesToScan(java.util.List<java.lang.String> propertiesToScan)
void
setScanContent(boolean scanContent)
void
setScanCustomMetadata(boolean scanCustomMetadata)
void
setScanProperties(boolean scanProperties)
void
whenErrorOccurs(java.util.function.Consumer<RegexScanError> errorCallback)
Allows you to provide a callback which will be invoked when an error occurs during scanning.void
whenProgressUpdated(java.util.function.Consumer<java.lang.Integer> callback)
Allows you to provide a callback which will be invoked when progress updates occur.
-
-
-
Method Detail
-
setMaxToStringLength
public static void setMaxToStringLength(int maxLength)
Configures the character count threshold in which the CharSequence TextObject of an item, obtained from the API, is first converted to a String object before being scanned for regular expression matches. CharSequence may make use of less memory and perform slower but scanning value as a String may perform faster and user more memory.- Parameters:
maxLength
- Maximum text length that should be converted to a String before scanning
-
whenProgressUpdated
public void whenProgressUpdated(java.util.function.Consumer<java.lang.Integer> callback)
Allows you to provide a callback which will be invoked when progress updates occur.- Parameters:
callback
- Callback to receive progress updates
-
fireProgressUpdated
protected void fireProgressUpdated(int value)
Fires progress update if there is a callback listening- Parameters:
value
- The progress value
-
whenErrorOccurs
public void whenErrorOccurs(java.util.function.Consumer<RegexScanError> errorCallback)
Allows you to provide a callback which will be invoked when an error occurs during scanning.- Parameters:
errorCallback
- The callback to be invoked when errors occur
-
fireScanError
protected void fireScanError(RegexScanError error)
Fires error event if there is a callback listening.- Parameters:
error
- The error which occurred
-
addPattern
public void addPattern(java.lang.String title, java.lang.String expression)
Adds a regular expression to be part of the scan with a given title. Creates a new instance ofPatternInfo
using the values provided.- Parameters:
title
- The associated titleexpression
- The Java regular expression string to add
-
scanItems
public java.util.List<ItemRegexMatchCollection> scanItems(java.util.Collection<nuix.Item> items)
Scans a series of items serially (no concurrency)- Parameters:
items
- The items to scan- Returns:
- List of matches
-
scanItems
public void scanItems(java.util.Collection<nuix.Item> items, java.util.function.Consumer<ItemRegexMatchCollection> callback)
Scans a series of items, providing each item's matches to callback as they are obtained. Items are scanned in serial (no concurrency).- Parameters:
items
- The items to scancallback
- Callback which will received each item's matches as they are obtained.
-
scanItemsParallel
public void scanItemsParallel(java.util.Collection<nuix.Item> items, java.util.function.Consumer<ItemRegexMatchCollection> callback)
Scans a series of items, providing each item's matches to callback as they are obtained. Items are scanned in parallel using a Java parallel stream.- Parameters:
items
- The items to scancallback
- Callback which will received each item's matches as they are obtained.
-
scanItemsParallel
public void scanItemsParallel(java.util.Collection<nuix.Item> items, java.util.function.Consumer<ItemRegexMatchCollection> callback, int concurrency) throws java.lang.Exception
Scans a series of items, providing each item's matches to callback as they are obtained. Items are scanned in parallel using a Java parallel stream. This differs from the methodscanItemsParallel(Collection, Consumer)
in that this method invokes the parallel stream within a thread pool to allow for controlling how many threads are used.- Parameters:
items
- The items to scancallback
- Callback which will received each item's matches as they are obtained.concurrency
- Number of threads to create in worker pool that parallel stream is invoked in- Throws:
java.lang.Exception
- if there is an error
-
scanItem
protected ItemRegexMatchCollection scanItem(nuix.Item item)
Scans a single item- Parameters:
item
- The item to be scanned- Returns:
- The matches for that item
-
getStringProperties
public static java.util.Map<java.lang.String,java.lang.String> getStringProperties(nuix.Item item, java.util.Set<java.lang.String> specificProperties)
Convenience method for converting the metadata properties of an item into a Map<String,String> so that regular expressions may be ran against them.- Parameters:
item
- The item from which metadata properties will be pulledspecificProperties
- List of specific properties to be pulled. If null is provided, all properties will be pulled.- Returns:
- Map of "stringified" metadata properties for the specified item
-
getStringCustomMetadata
public static java.util.Map<java.lang.String,java.lang.String> getStringCustomMetadata(nuix.Item item, java.util.Set<java.lang.String> specificFields)
Convenience method for converting the custom metadata fields of an item into a Map<String,String> so that regular expressions may be ran against them.- Parameters:
item
- The item from which metadata properties will be pulledspecificFields
- List of specific custom metadata fields to be pulled. If null is provided, all fields will be pulled.- Returns:
- Map of "stringified" custom metadata fields for the specified item
-
getContextualSubString
public static java.lang.String getContextualSubString(java.lang.CharSequence textSequence, int matchStart, int matchEnd, int contextSize)
-
getScanProperties
public boolean getScanProperties()
-
setScanProperties
public void setScanProperties(boolean scanProperties)
-
getScanCustomMetadata
public boolean getScanCustomMetadata()
-
setScanCustomMetadata
public void setScanCustomMetadata(boolean scanCustomMetadata)
-
getScanContent
public boolean getScanContent()
-
setScanContent
public void setScanContent(boolean scanContent)
-
getCaseSensitive
public boolean getCaseSensitive()
-
setCaseSensitive
public void setCaseSensitive(boolean caseSensitive)
-
getMultiline
public boolean getMultiline()
-
setMultiline
public void setMultiline(boolean multiline)
-
getDotall
public boolean getDotall()
-
setDotall
public void setDotall(boolean dotall)
-
getCaptureContextualText
public boolean getCaptureContextualText()
-
setCaptureContextualText
public void setCaptureContextualText(boolean captureContextualText)
-
getContextSize
public int getContextSize()
-
setContextSize
public void setContextSize(int contextSize)
-
getPatterns
public java.util.List<PatternInfo> getPatterns()
-
setPatterns
public void setPatterns(java.util.List<PatternInfo> patterns)
-
getPropertiesToScan
public java.util.List<java.lang.String> getPropertiesToScan()
-
setPropertiesToScan
public void setPropertiesToScan(java.util.List<java.lang.String> propertiesToScan)
-
getCustomMetadataToScan
public java.util.List<java.lang.String> getCustomMetadataToScan()
-
setCustomMetadataToScan
public void setCustomMetadataToScan(java.util.List<java.lang.String> fieldsToScan)
-
getMatchNamedEntityValues
public boolean getMatchNamedEntityValues()
-
setMatchNamedEntityValues
public void setMatchNamedEntityValues(boolean matchNamedEntityValues)
-
getNamedEntityTypes
public java.util.Set<java.lang.String> getNamedEntityTypes()
-
setNamedEntityTypes
public void setNamedEntityTypes(java.util.Collection<java.lang.String> namedEntityTypes)
-
abortScan
public void abortScan()
When running a scan by providing a Consumer callback, this will signal that further scanning should be aborted.
-
-