LogTokenizing

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def leadingPunctuation: Regex

A regular expression describing punctuation to strip from the beginning of tokens; matches will be stripped by replacing them with their first match group.
A regular expression describing punctuation to strip from the beginning of tokens; matches will be stripped by replacing them with their first match group. Override this definition to customize tokenizer behavior. Defaults to
```
"(\\s)[^\\sA-Za-z0-9-_/]+|()^[^\\sA-Za-z0-9-_/]+"
```
.
"(\\s)[^{\\sA-Za-z0-9-_/]+|()}[^\\sA-Za-z0-9-_/]+"
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def rejectedIntratokenPunctuation: Regex

A regular expression describing punctuation to strip from within tokens; matches will be stripped by replacing them with the empty string.
A regular expression describing punctuation to strip from within tokens; matches will be stripped by replacing them with the empty string. Override this definition to customize tokenizer behavior. Defaults to
```
"[^A-Za-z0-9-_./:@]"
```
if not overridden.
"[^A-Za-z0-9-_./:@]"
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def tokens(msg: String, post: (String) ⇒ String = identity[String], pred: (String) ⇒ Boolean = str => true): Seq[String]

Splits a log message into a sequence of tokens, by
Splits a log message into a sequence of tokens, by
- collapsing runs of whitespace into single spaces,
- stripping rejected intertoken punctuation,
- stripping rejected intratoken punctuation,
- splitting on whitespace,
- rejecting candidate tokens not containing at least one letter, and
- applying optional user-supplied transformation and filtering functions.
returns
a sequence of tokens

See also
Using word2vec on log messages
def trailingPunctuation: Regex

A regular expression describing punctuation to strip from the end of tokens; matches will be stripped by replacing them with their first match group.
A regular expression describing punctuation to strip from the end of tokens; matches will be stripped by replacing them with their first match group. Override this definition to customize tokenizer behavior. Defaults to
```
"[^\\sA-Za-z0-9-_/]+(\\s)|()[^\\sA-Za-z0-9-_/]+$"
```
if not overridden.
"[^{\\sA-Za-z0-9-_/]+(\\s)|()[}\\sA-Za-z0-9-_/]+$"
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package text

trait LogTokenizing extends AnyRef

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def leadingPunctuation: Regex

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def rejectedIntratokenPunctuation: Regex

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def tokens(msg: String, post: (String) ⇒ String = identity[String], pred: (String) ⇒ Boolean = str => true): Seq[String]

def trailingPunctuation: Regex

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped