//SRSMTC: SRS-based MTC Algorithm |
//the -grams |
Int ; |
//the simple random sampling rate |
Float ; |
//the number of categories |
Int ; |
//the token level memory |
TLM ; |
//extract tokens based on overlapping word-level -grams model |
String Tokenizer(Document ) |
//sample tokens based on the simple random sampling rate |
String SimpleRandomSampler(String ) |
//compute conditional probability P() for each category |
Float BayesianPredictor(String token) |
(1) Float := new Float; |
(2) Loop: For Each Int Do: |
(2.1) := .TF(, token)/.DF(); |
(3) Float sum:= Sum(); //add the floats to a sum |
(4) Loop: For Each Int Do: |
(4.1) := /sum; |
(5) Output . |
//SRSMTC.T: Training Procedure of SRSMTC |
SRSMTC.T(Document ; Category ) |
(1) Int := D.size; //get the number of training documents |
(2) Loop: For Each Int Do: |
(2.1) Document := ; |
(2.2) Category := ; |
(2.3) String := Tokenizer(); |
(2.4) String := SimpleRandomSampler(); |
(2.5) .DF():= .DF() + 1; |
(2.6) Loop: For Each Do: |
(2.6.1) If .contain() Then: .TF():= .TF() + 1; |
(2.6.2) Else: |
(2.6.2.1) .TF():= 1; |
(2.6.2.2) .TF():= 0; // means all other categories |
(2.6.2.3) .put(). |
//SRSMTC.P: Predicting Procedure of SRSMTC |
Category SRSMTC.P(Document D) |
(1) Int := .size; //get the number of testing documents |
(2) Category := new Category; |
(3) Loop: For Each Int Do: |
(3.1) Document := ; |
(3.2) String := Tokenizer(); |
(3.3) Float ep:= new Float; |
(3.4) Loop: For Each String Do: |
(3.4.1) Float := BayesianPredictor(); |
(3.4.2) Loop: For Each Int Do: |
(3.4.2.1) ep[]:= ep[] + ; |
(3.5) Float sum:= Sum(ep); //add the floats to a sum |
(3.6) Loop: For Each Int Do: |
(3.6.1) ep[]:= ep[]/sum; |
(3.7) Int index:= Math.max(ep).getIndex; |
(3.8) := ; |
(4) Output C. |