CaboCha
cabocha.h
Go to the documentation of this file.
1 /* CaboCha -- Yet Another Japanese Dependency Parser
2  $Id: cabocha.h 50 2009-05-03 08:25:36Z taku-ku $;
3  Copyright(C) 2001-2008 Taku Kudo <taku@chasen.org>
4 */
5 #ifndef CABOCHA_CABOCHA_H_
6 #define CABOCHA_CABOCHA_H_
7 
8 #ifdef __cplusplus
9 extern "C" {
10 #endif
11 
12 #include <stddef.h>
13 
14 #ifdef _WIN32
15 # ifdef DLL_EXPORT
16 # define CABOCHA_DLL_EXTERN __declspec(dllexport)
17 # define CABOCHA_DLL_CLASS_EXTERN __declspec(dllexport)
18 # else
19 # ifdef DLL_IMPORT
20 # define CABOCHA_DLL_EXTERN __declspec(dllimport)
21 # endif
22 # endif
23 #endif
24 
25 #ifndef CABOCHA_DLL_EXTERN
26 # define CABOCHA_DLL_EXTERN extern
27 #endif
28 
29 #ifndef CABOCHA_DLL_CLASS_EXTERN
30 # define CABOCHA_DLL_CLASS_EXTERN
31 #endif
32 
33  enum {
38  };
39 
40  enum {
44  };
45 
46  enum {
53  };
54 
55  enum {
61  };
62 
63  enum {
69  };
70 
71  enum {
75  };
76 
77  typedef struct cabocha_t cabocha_t;
79  struct mecab_node_t;
80 
81  struct cabocha_chunk_t {
82  int link;
83  size_t head_pos;
84  size_t func_pos;
85  size_t token_size;
86  size_t token_pos;
87  float score;
88  const char **feature_list;
89  const char *additional_info;
90  unsigned short int feature_list_size;
91  };
92 
93  struct cabocha_token_t {
94  const char *surface;
95  const char *normalized_surface;
96  const char *feature;
97  const char **feature_list;
98  unsigned short int feature_list_size;
99  const char *ne;
100  const char *additional_info;
102  };
103 
104  typedef struct cabocha_t cabocha_t;
105  typedef struct cabocha_tree_t cabocha_tree_t;
108  typedef struct mecab_node_t mecab_node_t;
109 
110 #ifndef SWIG
111  CABOCHA_DLL_EXTERN int cabocha_do(int argc, char **argv);
112 
113  /* parser */
114  CABOCHA_DLL_EXTERN cabocha_t *cabocha_new(int argc, char **argv);
115  CABOCHA_DLL_EXTERN cabocha_t *cabocha_new2(const char *arg);
116  CABOCHA_DLL_EXTERN const char *cabocha_strerror(cabocha_t* cabocha);
117  CABOCHA_DLL_EXTERN const cabocha_tree_t *cabocha_parse_tree(cabocha_t *cabocha,
118  cabocha_tree_t *tree);
119  CABOCHA_DLL_EXTERN const char *cabocha_sparse_tostr(cabocha_t* cabocha,
120  const char* str);
121  CABOCHA_DLL_EXTERN const char *cabocha_sparse_tostr2(cabocha_t* cabocha,
122  const char* str, size_t lenght);
123  CABOCHA_DLL_EXTERN const char *cabocha_sparse_tostr3(cabocha_t* cabocha, const char* str, size_t length,
124  char *output_str, size_t output_length);
125  CABOCHA_DLL_EXTERN void cabocha_destroy(cabocha_t* cabocha);
126  CABOCHA_DLL_EXTERN const cabocha_tree_t *cabocha_sparse_totree(cabocha_t* cabocha, const char* str);
127  CABOCHA_DLL_EXTERN const cabocha_tree_t *cabocha_sparse_totree2(cabocha_t* cabocha, const char* str, size_t length);
128  CABOCHA_DLL_EXTERN const cabocha_tree_t *cabocha_parse_tree(cabocha_t* cabocha, cabocha_tree_t *tree);
129 
130  /* tree */
131  CABOCHA_DLL_EXTERN cabocha_tree_t *cabocha_tree_new();
132  CABOCHA_DLL_EXTERN void cabocha_tree_destroy(cabocha_tree_t* tree);
133  CABOCHA_DLL_EXTERN int cabocha_tree_empty(cabocha_tree_t* tree);
134  CABOCHA_DLL_EXTERN void cabocha_tree_clear(cabocha_tree_t* tree);
135  CABOCHA_DLL_EXTERN void cabocha_tree_clear_chunk(cabocha_tree_t* tree);
136  CABOCHA_DLL_EXTERN size_t cabocha_tree_size(cabocha_tree_t* tree);
137  CABOCHA_DLL_EXTERN size_t cabocha_tree_chunk_size(cabocha_tree_t* tree);
138  CABOCHA_DLL_EXTERN size_t cabocha_tree_token_size(cabocha_tree_t* tree);
139  CABOCHA_DLL_EXTERN const char *cabocha_tree_sentence(cabocha_tree_t* tree);
140  CABOCHA_DLL_EXTERN size_t cabocha_tree_sentence_size(cabocha_tree_t* tree);
141  CABOCHA_DLL_EXTERN void cabocha_tree_set_sentence(cabocha_tree_t* tree,
142  const char *sentence,
143  size_t length);
144  CABOCHA_DLL_EXTERN int cabocha_tree_read(cabocha_tree_t* tree,
145  const char *input,
146  size_t length,
147  int input_layer);
148  CABOCHA_DLL_EXTERN int cabocha_tree_read_from_mecab_node(cabocha_tree_t* tree,
149  const mecab_node_t *node);
150 
151  CABOCHA_DLL_EXTERN const cabocha_token_t *cabocha_tree_token(cabocha_tree_t* tree, size_t i);
152  CABOCHA_DLL_EXTERN const cabocha_chunk_t *cabocha_tree_chunk(cabocha_tree_t* tree, size_t i);
153 
154  CABOCHA_DLL_EXTERN cabocha_token_t *cabocha_tree_add_token(cabocha_tree_t* tree);
155  CABOCHA_DLL_EXTERN cabocha_chunk_t *cabocha_tree_add_chunk(cabocha_tree_t* tree);
156 
157  CABOCHA_DLL_EXTERN char *cabocha_tree_strdup(cabocha_tree_t* tree, const char *str);
158  CABOCHA_DLL_EXTERN char *cabocha_tree_alloc(cabocha_tree_t* tree, size_t size);
159 
160  CABOCHA_DLL_EXTERN const char *cabocha_tree_tostr(cabocha_tree_t* tree, int format);
161  CABOCHA_DLL_EXTERN const char *cabocha_tree_tostr2(cabocha_tree_t* tree, int format,
162  char *str, size_t length);
163 
164  CABOCHA_DLL_EXTERN void cabocha_tree_set_charset(cabocha_tree_t* tree,
165  int charset);
166  CABOCHA_DLL_EXTERN int cabocha_tree_charset(cabocha_tree_t* tree);
167  CABOCHA_DLL_EXTERN void cabocha_tree_set_posset(cabocha_tree_t* tree,
168  int posset);
169  CABOCHA_DLL_EXTERN int cabocha_tree_posset(cabocha_tree_t* tree);
170  CABOCHA_DLL_EXTERN void cabocha_tree_set_output_layer(cabocha_tree_t* tree,
171  int output_layer);
172  CABOCHA_DLL_EXTERN int cabocha_tree_output_layer(cabocha_tree_t* tree);
173 
174  CABOCHA_DLL_EXTERN int cabocha_learn(int argc, char **argv);
175  CABOCHA_DLL_EXTERN int cabocha_system_eval(int argc, char **argv);
176  CABOCHA_DLL_EXTERN int cabocha_model_index(int argc, char **argv);
177 #endif
178 
179 #ifdef __cplusplus
180 }
181 #endif
182 
183 /* for C++ */
184 #ifdef __cplusplus
185 
186 namespace CaboCha {
187 
188 class Tree;
189 typedef struct cabocha_chunk_t Chunk;
190 typedef struct cabocha_token_t Token;
191 
197 };
198 
203 };
204 
212 };
213 
220 };
221 
228 };
229 
234 };
235 
236 class TreeAllocator;
237 
239  public:
240  void set_sentence(const char *sentence);
241  const char *sentence() const;
242  size_t sentence_size() const;
243 
244 #ifndef SWIG
245  void set_sentence(const char *sentence, size_t length);
246 #endif
247 
248  const Chunk *chunk(size_t i) const;
249  const Token *token(size_t i) const;
250 
251 #ifndef SWIG
252  Chunk *mutable_chunk(size_t i);
253  Token *mutable_token(size_t i);
254 
255  Token *add_token();
256  Chunk *add_chunk();
257 
258  char *strdup(const char *str);
259  char *alloc(size_t size);
260  char **alloc_char_array(size_t size);
261 
262  TreeAllocator *allocator() const;
263 #endif
264 
265  bool read(const char *input,
266  InputLayerType input_layer);
267 
268 #ifndef SWIG
269  bool read(const char *input, size_t length,
270  InputLayerType input_layer);
271  bool read(const mecab_node_t *node);
272 #endif
273 
274  bool empty() const;
275  void clear();
276  void clear_chunk();
277 
278  size_t chunk_size() const;
279  size_t token_size() const;
280  size_t size() const;
281 
282  const char *toString(FormatType output_format);
283 
284 #ifndef SWIG
285  const char *toString(FormatType output_format,
286  char *output, size_t length) const;
287 #endif
288 
289  CharsetType charset() const { return charset_; }
290  void set_charset(CharsetType charset) { charset_ = charset; }
291  PossetType posset() const { return posset_; }
292  void set_posset(PossetType posset) { posset_ = posset; }
293  OutputLayerType output_layer() const { return output_layer_; }
294  void set_output_layer(OutputLayerType output_layer) { output_layer_ = output_layer; }
295 
296  const char *what();
297 
298  explicit Tree();
299  virtual ~Tree();
300 
301  private:
302  TreeAllocator *tree_allocator_;
303  CharsetType charset_;
304  PossetType posset_;
305  OutputLayerType output_layer_;
306 };
307 
309  public:
310  virtual const Tree *parse(const char *input) = 0;
311  virtual const char *parseToString(const char *input) = 0;
312  virtual const Tree *parse(Tree *tree) const = 0;
313 
314 #ifndef SWIG
315  virtual const Tree *parse(const char *input, size_t length) = 0;
316  virtual const char *parseToString(const char *input, size_t length) = 0;
317  virtual const char *parseToString(const char *input, size_t length,
318  char *output, size_t output_length) = 0;
319 #endif
320 
321  virtual const char *what() = 0;
322  static const char *version();
323 
324  virtual ~Parser() {}
325 
326 #ifndef SWIG
327  static Parser *create(int argc, char **argv);
328  static Parser *create(const char *arg);
329 #endif
330 };
331 
332 CABOCHA_DLL_EXTERN Parser *createParser(int argc, char **argv);
333 CABOCHA_DLL_EXTERN Parser *createParser(const char *arg);
334 CABOCHA_DLL_EXTERN const char *getParserError();
335 CABOCHA_DLL_EXTERN const char *getLastError();
336 
337 // API for training
338 CABOCHA_DLL_EXTERN bool runDependencyTraining(
339  const char *train_file,
340  const char *model_file,
341  const char *prev_model_file,
342  CharsetType charset,
343  PossetType posset,
344  double cost,
345  int freq);
346 
347 CABOCHA_DLL_EXTERN bool runChunkingTraining(
348  const char *train_file,
349  const char *model_file,
350  const char *prev_model_file,
351  CharsetType charset,
352  PossetType posset,
353  double cost,
354  int freq);
355 
356 CABOCHA_DLL_EXTERN bool runNETraining(
357  const char *train_file,
358  const char *model_file,
359  const char *prev_model_file,
360  CharsetType charset,
361  PossetType posset,
362  double cost,
363  int freq);
364 }
365 #endif
366 #endif
CABOCHA_DLL_EXTERN const cabocha_chunk_t * cabocha_tree_chunk(cabocha_tree_t *tree, size_t i)
Definition: cabocha.h:207
Definition: cabocha.h:201
CABOCHA_DLL_EXTERN int cabocha_tree_empty(cabocha_tree_t *tree)
CABOCHA_DLL_EXTERN const char * getLastError()
Definition: cabocha.h:58
Definition: cabocha.h:238
CABOCHA_DLL_EXTERN size_t cabocha_tree_sentence_size(cabocha_tree_t *tree)
CABOCHA_DLL_EXTERN size_t cabocha_tree_token_size(cabocha_tree_t *tree)
Definition: cabocha.h:232
const char * ne
Definition: cabocha.h:99
int link
Definition: cabocha.h:82
CABOCHA_DLL_EXTERN void cabocha_tree_clear(cabocha_tree_t *tree)
CABOCHA_DLL_EXTERN size_t cabocha_tree_chunk_size(cabocha_tree_t *tree)
Definition: cabocha.h:52
const char ** feature_list
Definition: cabocha.h:88
Definition: cabocha.h:41
CABOCHA_DLL_EXTERN const char * cabocha_sparse_tostr2(cabocha_t *cabocha, const char *str, size_t lenght)
Definition: cabocha.h:51
const char * feature
Definition: cabocha.h:96
CABOCHA_DLL_EXTERN int cabocha_model_index(int argc, char **argv)
Definition: cabocha.h:217
Definition: cabocha.h:72
Definition: cabocha.h:202
CharsetType charset() const
Definition: cabocha.h:289
CABOCHA_DLL_EXTERN Parser * createParser(int argc, char **argv)
CABOCHA_DLL_EXTERN void cabocha_tree_set_output_layer(cabocha_tree_t *tree, int output_layer)
CABOCHA_DLL_EXTERN int cabocha_tree_posset(cabocha_tree_t *tree)
float score
Definition: cabocha.h:87
CABOCHA_DLL_EXTERN void cabocha_destroy(cabocha_t *cabocha)
CABOCHA_DLL_EXTERN const char * cabocha_sparse_tostr(cabocha_t *cabocha, const char *str)
Definition: cabocha.h:74
Definition: cabocha.h:196
Definition: cabocha.h:195
unsigned short int feature_list_size
Definition: cabocha.h:98
CABOCHA_DLL_EXTERN const char * cabocha_strerror(cabocha_t *cabocha)
Definition: cabocha.h:48
CABOCHA_DLL_EXTERN bool runChunkingTraining(const char *train_file, const char *model_file, const char *prev_model_file, CharsetType charset, PossetType posset, double cost, int freq)
Definition: cabocha.h:194
ParserType
Definition: cabocha.h:230
Definition: cabocha.h:208
Definition: cabocha.h:225
Definition: cabocha.h:308
OutputLayerType
Definition: cabocha.h:222
virtual ~Parser()
Definition: cabocha.h:324
CABOCHA_DLL_EXTERN int cabocha_do(int argc, char **argv)
CABOCHA_DLL_EXTERN void cabocha_tree_clear_chunk(cabocha_tree_t *tree)
Definition: cabocha.h:215
CABOCHA_DLL_EXTERN cabocha_t * cabocha_new2(const char *arg)
Definition: cabocha.h:49
InputLayerType
Definition: cabocha.h:214
CABOCHA_DLL_EXTERN int cabocha_tree_charset(cabocha_tree_t *tree)
Definition: cabocha.h:66
CABOCHA_DLL_EXTERN const char * cabocha_tree_sentence(cabocha_tree_t *tree)
size_t token_size
Definition: cabocha.h:85
CABOCHA_DLL_EXTERN const cabocha_tree_t * cabocha_sparse_totree(cabocha_t *cabocha, const char *str)
struct cabocha_chunk_t * chunk
Definition: cabocha.h:101
Definition: cabocha.h:59
CABOCHA_DLL_EXTERN cabocha_t * cabocha_new(int argc, char **argv)
Definition: cabocha.h:200
CABOCHA_DLL_EXTERN int cabocha_tree_read(cabocha_tree_t *tree, const char *input, size_t length, int input_layer)
void set_posset(PossetType posset)
Definition: cabocha.h:292
const char * surface
Definition: cabocha.h:94
struct cabocha_tree_t cabocha_tree_t
Definition: cabocha.h:78
CABOCHA_DLL_EXTERN const char * cabocha_tree_tostr2(cabocha_tree_t *tree, int format, char *str, size_t length)
Definition: cabocha.h:64
Definition: cabocha.h:47
CABOCHA_DLL_EXTERN void cabocha_tree_set_posset(cabocha_tree_t *tree, int posset)
Definition: cabocha.h:35
Definition: cabocha.h:218
const char * additional_info
Definition: cabocha.h:89
CABOCHA_DLL_EXTERN const char * getParserError()
Definition: cabocha.h:60
Definition: cabocha.h:233
struct mecab_node_t mecab_node_t
Definition: cabocha.h:108
void set_output_layer(OutputLayerType output_layer)
Definition: cabocha.h:294
Definition: cabocha.h:210
Definition: cabocha.h:219
CABOCHA_DLL_EXTERN const char * cabocha_sparse_tostr3(cabocha_t *cabocha, const char *str, size_t length, char *output_str, size_t output_length)
Definition: cabocha.h:37
CABOCHA_DLL_EXTERN void cabocha_tree_destroy(cabocha_tree_t *tree)
Definition: cabocha.h:34
Definition: cabocha.h:216
Definition: cabocha.h:223
PossetType
Definition: cabocha.h:199
Definition: cabocha.h:43
Definition: cabocha.h:67
CABOCHA_DLL_EXTERN int cabocha_tree_read_from_mecab_node(cabocha_tree_t *tree, const mecab_node_t *node)
const char * additional_info
Definition: cabocha.h:100
CABOCHA_DLL_EXTERN const char * cabocha_tree_tostr(cabocha_tree_t *tree, int format)
Definition: cabocha.h:65
const char ** feature_list
Definition: cabocha.h:97
size_t token_pos
Definition: cabocha.h:86
Definition: cabocha.h:68
Definition: cabocha.h:57
Definition: cabocha.h:231
size_t head_pos
Definition: cabocha.h:83
CABOCHA_DLL_EXTERN char * cabocha_tree_strdup(cabocha_tree_t *tree, const char *str)
CABOCHA_DLL_EXTERN const cabocha_tree_t * cabocha_sparse_totree2(cabocha_t *cabocha, const char *str, size_t length)
PossetType posset() const
Definition: cabocha.h:291
Definition: cabocha.h:206
Definition: cabocha.h:209
CABOCHA_DLL_EXTERN cabocha_chunk_t * cabocha_tree_add_chunk(cabocha_tree_t *tree)
Definition: cabocha.h:227
unsigned short int feature_list_size
Definition: cabocha.h:90
CABOCHA_DLL_EXTERN int cabocha_learn(int argc, char **argv)
Definition: cabocha.h:93
Definition: cabocha.h:193
OutputLayerType output_layer() const
Definition: cabocha.h:293
Definition: cabocha.h:56
const char * normalized_surface
Definition: cabocha.h:95
CABOCHA_DLL_EXTERN cabocha_tree_t * cabocha_tree_new()
CharsetType
Definition: cabocha.h:192
CABOCHA_DLL_EXTERN int cabocha_system_eval(int argc, char **argv)
void set_charset(CharsetType charset)
Definition: cabocha.h:290
Definition: cabocha.h:36
CABOCHA_DLL_EXTERN bool runNETraining(const char *train_file, const char *model_file, const char *prev_model_file, CharsetType charset, PossetType posset, double cost, int freq)
CABOCHA_DLL_EXTERN const cabocha_token_t * cabocha_tree_token(cabocha_tree_t *tree, size_t i)
CABOCHA_DLL_EXTERN cabocha_token_t * cabocha_tree_add_token(cabocha_tree_t *tree)
Definition: cabocha.h:81
Definition: cabocha.h:226
CABOCHA_DLL_EXTERN void cabocha_tree_set_charset(cabocha_tree_t *tree, int charset)
Definition: cabocha.h:211
CABOCHA_DLL_EXTERN void cabocha_tree_set_sentence(cabocha_tree_t *tree, const char *sentence, size_t length)
CABOCHA_DLL_EXTERN bool runDependencyTraining(const char *train_file, const char *model_file, const char *prev_model_file, CharsetType charset, PossetType posset, double cost, int freq)
Definition: cabocha.h:42
#define CABOCHA_DLL_CLASS_EXTERN
Definition: cabocha.h:30
struct cabocha_t cabocha_t
Definition: cabocha.h:77
size_t func_pos
Definition: cabocha.h:84
CABOCHA_DLL_EXTERN char * cabocha_tree_alloc(cabocha_tree_t *tree, size_t size)
CABOCHA_DLL_EXTERN const cabocha_tree_t * cabocha_parse_tree(cabocha_t *cabocha, cabocha_tree_t *tree)
Definition: cabocha.h:73
FormatType
Definition: cabocha.h:205
Definition: cabocha.h:224
Definition: cabocha.h:50
CABOCHA_DLL_EXTERN int cabocha_tree_output_layer(cabocha_tree_t *tree)
CABOCHA_DLL_EXTERN size_t cabocha_tree_size(cabocha_tree_t *tree)