/* $Id: organize.h,v 1.4 2002/08/06 14:56:01 arto Exp $
 * 
 * Copyright Arto Ters <arto.teras@hip.fi> 2002
 */

#ifndef _ORGANIZE_H
#define _ORGANIZE_H

#define PNAME "dbmail/organize"

#include <bow/libbow.h>

enum SORTING_METHODS {SORTING_SOM, SORTING_BRUTEFORCE};

enum WEIGHTING_METHODS {WEIGHTING_IDF};

/* All these may not be implemented yet, see code */
typedef struct
{
  int prune_words_in_more_than_n_docs;
  int prune_words_in_more_than_n_percent; 
  int prune_words_in_less_than_n_docs;
  int prune_words_in_less_than_n_percent; 
  int prune_by_infogain;      /* Number of words to preserve, 0 if infogain not used */
  int sorting_method;         /* Values of enum SORTING_METHODS */
  int smoothing_method;       /* See bow_smoothing in libbow.h */
  int weighting_method;
  int max_word_vector_length; /* Maximum number of words to consider, 0 = unlimited */
  int verbosity;              /* Bigger value = print more stuff to stdout or log */  
} organize_parameters;


#endif
