SHOGUN  6.1.3
DynProg.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #ifndef __CDYNPROG_H__
14 #define __CDYNPROG_H__
15 
17 #include <shogun/lib/common.h>
18 #include <shogun/base/SGObject.h>
19 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/config.h>
23 #include <shogun/structure/Plif.h>
31 #include <shogun/lib/Time.h>
32 
33 
34 namespace shogun
35 {
36  template <class T> class CSparseFeatures;
37  class CIntronList;
38  class CPlifMatrix;
39  class CSegmentLoss;
40 
41  template <class T> class CDynamicArray;
42 
43 //#define DYNPROG_TIMING
44 
45 #ifdef USE_BIGSTATES
46 typedef uint16_t T_STATES ;
47 #else
48 typedef uint8_t T_STATES ;
49 #endif
50 typedef T_STATES* P_STATES ;
51 
52 #ifndef DOXYGEN_SHOULD_SKIP_THIS
53 
54 struct segment_loss_struct
55 {
57  int32_t maxlookback;
59  int32_t seqlen;
61  int32_t *segments_changed;
63  float64_t *num_segment_id;
65  int32_t *length_segment_id ;
66 };
67 #endif
68 
74 class CDynProg : public CSGObject
75 {
76 public:
81  CDynProg(int32_t p_num_svms=8);
82  virtual ~CDynProg();
83 
84  // model related functions
90  void set_num_states(int32_t N);
91 
93  int32_t get_num_states();
94 
96  int32_t get_num_svms();
97 
103  void init_content_svm_value_array(const int32_t p_num_svms);
104 
112  void init_tiling_data(int32_t* probe_pos, float64_t* intensities, const int32_t num_probes);
113 
120  void precompute_tiling_plifs(CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs);
121 
126  void resize_lin_feat(int32_t num_new_feat);
131  void set_p_vector(SGVector<float64_t> p);
132 
137  void set_q_vector(SGVector<float64_t> q);
138 
143  void set_a(SGMatrix<float64_t> a);
144 
149  void set_a_id(SGMatrix<int32_t> a);
150 
155  void set_a_trans_matrix(SGMatrix<float64_t> a_trans);
156 
161  void init_mod_words_array(SGMatrix<int32_t> p_mod_words_array);
162 
168  bool check_svm_arrays();
169 
174  void set_observation_matrix(SGNDArray<float64_t> seq);
175 
182  int32_t get_num_positions();
183 
193  void set_content_type_array(SGMatrix<float64_t> seg_path);
194 
199  void set_pos(SGVector<int32_t> pos);
200 
206  void set_orf_info(SGMatrix<int32_t> orf_info);
207 
212  void set_gene_string(SGVector<char> genestr);
213 
214 
219  void set_dict_weights(SGMatrix<float64_t> dictionary_weights);
220 
225  void best_path_set_segment_loss(SGMatrix<float64_t> segment_loss);
226 
233  void best_path_set_segment_ids_mask(int32_t* segment_ids, float64_t* segment_mask, int32_t m);
234 
236  void set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2);
237 
242  void set_plif_matrices(CPlifMatrix* pm);
243 
244  // best_path result retrieval functions
249  SGVector<float64_t> get_scores();
250 
255  SGMatrix<int32_t> get_states();
256 
261  SGMatrix<int32_t> get_positions();
262 
263 
272  void compute_nbest_paths(int32_t max_num_signals,
273  bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences);
274 
276 
288  void best_path_trans_deriv(
289  int32_t* my_state_seq, int32_t *my_pos_seq,
290  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals);
291 
292  // additional best_path_trans_deriv functions
297  void set_my_state_seq(int32_t* my_state_seq);
298 
303  void set_my_pos_seq(int32_t* my_pos_seq);
304 
312  void get_path_scores(float64_t** my_scores, int32_t* seq_len);
313 
321  void get_path_losses(float64_t** my_losses, int32_t* seq_len);
322 
323 
325  inline T_STATES get_N() const
326  {
327  return m_N ;
328  }
329 
334  inline void set_q(T_STATES offset, float64_t value)
335  {
336  m_end_state_distribution_q[offset]=value;
337  }
338 
343  inline void set_p(T_STATES offset, float64_t value)
344  {
345  m_initial_state_distribution_p[offset]=value;
346  }
347 
354  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
355  {
356  m_transition_matrix_a.element(line_,column)=value; // look also best_path!
357  }
358 
364  inline float64_t get_q(T_STATES offset) const
365  {
366  return m_end_state_distribution_q[offset];
367  }
368 
374  inline float64_t get_q_deriv(T_STATES offset) const
375  {
376  return m_end_state_distribution_q_deriv[offset];
377  }
378 
384  inline float64_t get_p(T_STATES offset) const
385  {
386  return m_initial_state_distribution_p[offset];
387  }
388 
394  inline float64_t get_p_deriv(T_STATES offset) const
395  {
396  return m_initial_state_distribution_p_deriv[offset];
397  }
398 
402  void precompute_content_values();
403 
410  inline float64_t* get_lin_feat(int32_t & dim1, int32_t & dim2)
411  {
412  m_lin_feat.get_array_size(dim1, dim2);
413  return m_lin_feat.get_array();
414  }
423  inline void set_lin_feat(float64_t* p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
424  {
425  m_lin_feat.set_array(p_lin_feat, p_num_svms, p_seq_len, true, true);
426  }
431  void create_word_string();
432 
435  void precompute_stop_codons();
436 
443  inline float64_t get_a(T_STATES line_, T_STATES column) const
444  {
445  return m_transition_matrix_a.element(line_, column); // look also best_path()!
446  }
447 
454  inline float64_t get_a_deriv(T_STATES line_, T_STATES column) const
455  {
456  return m_transition_matrix_a_deriv.element(line_, column); // look also best_path()!
457  }
459 
464  void set_intron_list(CIntronList* intron_list, int32_t num_plifs);
465 
468  {
469  return m_seg_loss_obj;
470  }
471 
478  void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
479  {
480  m_long_transitions = use_long_transitions;
481  m_long_transition_threshold = threshold;
482  SG_DEBUG("ignoring max_len\n")
483  //m_long_transition_max = max_len;
484  }
485 
486 protected:
487 
488  /* helper functions */
489 
499  void lookup_content_svm_values(const int32_t from_state,
500  const int32_t to_state, const int32_t from_pos, const int32_t to_pos,
501  float64_t* svm_values, int32_t frame);
502 
510  inline void lookup_tiling_plif_values(const int32_t from_state,
511  const int32_t to_state, const int32_t len, float64_t* svm_values);
512 
517  inline int32_t find_frame(const int32_t from_state);
518 
527  inline int32_t raw_intensities_interval_query(
528  const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type);
529 
530 #ifndef DOXYGEN_SHOULD_SKIP_THIS
531 
532  struct svm_values_struct
533  {
535  int32_t maxlookback;
537  int32_t seqlen;
538 
540  int32_t* start_pos;
542  float64_t ** svm_values_unnormalized;
544  float64_t * svm_values;
546  bool *** word_used;
548  int32_t **num_unique_words;
549  };
550 #endif // DOXYGEN_SHOULD_SKIP_THIS
551 
560  bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to);
561 
563  virtual const char* get_name() const { return "DynProg"; }
564 
565 private:
566 
567  T_STATES trans_list_len;
568  T_STATES **trans_list_forward;
569  T_STATES *trans_list_forward_cnt;
570  float64_t **trans_list_forward_val;
571  int32_t **trans_list_forward_id;
572  bool mem_initialized;
573 
574 #ifdef DYNPROG_TIMING
575  CTime MyTime;
576  CTime MyTime2;
577  CTime MyTime3;
578 
579  float64_t segment_init_time;
580  float64_t segment_pos_time;
581  float64_t segment_clean_time;
582  float64_t segment_extend_time;
583  float64_t orf_time;
584  float64_t content_time;
585  float64_t content_penalty_time;
586  float64_t content_svm_values_time ;
587  float64_t content_plifs_time ;
588  float64_t svm_init_time;
589  float64_t svm_pos_time;
590  float64_t inner_loop_time;
591  float64_t inner_loop_max_time ;
592  float64_t svm_clean_time;
593  float64_t long_transition_time ;
594 #endif
595 
596 
597 protected:
602  int32_t m_N;
604 
609 
613 
617 
619 
621  int32_t m_num_degrees;
623  int32_t m_num_svms;
624 
647 
649 // CDynamicArray<int32_t> m_svm_pos_start;
655  int32_t m_max_a_id;
656 
657  // input arguments
663  int32_t m_seq_len;
686  uint16_t*** m_wordstr;
703 
707 
708  // output arguments
715 
722 
727 
731 
734 
740 
744  int32_t* m_probe_pos;
750  int32_t m_num_raw_data;
751 
761  //int32_t m_long_transition_max ;
762 
766  static int32_t word_degree_default[4];
767 
771  static int32_t cum_num_words_default[5];
772 
775  static int32_t frame_plifs[3];
776 
779  static int32_t num_words_default[4];
780 
782  static int32_t mod_words_default[32];
783 
785  static bool sign_words_default[16];
786 
788  static int32_t string_words_default[16];
789 };
790 }
791 #endif
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
CDynamicArray< float64_t > m_end_state_distribution_q_deriv
Definition: DynProg.h:616
bool m_svm_arrays_clean
Definition: DynProg.h:653
CDynamicArray< float64_t > m_segment_loss
Definition: DynProg.h:690
CPlifMatrix * m_plif_matrices
Definition: DynProg.h:721
float64_t get_a(T_STATES line_, T_STATES column) const
Definition: DynProg.h:443
CDynamicArray< int32_t > m_positions
Definition: DynProg.h:714
T_STATES * P_STATES
Definition: HMM.h:64
uint16_t *** m_wordstr
Definition: DynProg.h:686
CDynamicArray< float64_t > m_dict_weights
Definition: DynProg.h:688
void set_lin_feat(float64_t *p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
Definition: DynProg.h:423
CDynamicArray< int32_t > m_segment_ids
Definition: DynProg.h:692
T_STATES get_N() const
access function for number of states N
Definition: DynProg.h:325
float64_t get_a_deriv(T_STATES line_, T_STATES column) const
Definition: DynProg.h:454
bool m_long_transitions
Definition: DynProg.h:753
int32_t m_max_a_id
Definition: DynProg.h:655
CDynamicArray< int32_t > m_transition_matrix_a_id
transition matrix
Definition: DynProg.h:606
CDynamicArray< int32_t > m_word_degree
Definition: DynProg.h:626
int32_t * m_cum_num_words_array
Definition: DynProg.h:630
float64_t get_q(T_STATES offset) const
Definition: DynProg.h:364
int32_t * m_mod_words_array
Definition: DynProg.h:638
CDynamicArray< float64_t > m_lin_feat
Definition: DynProg.h:739
class IntronList
Definition: SegmentLoss.h:24
float64_t get_p(T_STATES offset) const
Definition: DynProg.h:384
int32_t m_num_intron_plifs
Definition: DynProg.h:733
int32_t m_seq_len
Definition: DynProg.h:663
CDynamicArray< float64_t > m_initial_state_distribution_p_deriv
Definition: DynProg.h:612
int32_t m_num_degrees
Definition: DynProg.h:621
CSparseFeatures< float64_t > * m_seq_sparse1
Definition: DynProg.h:717
CSparseFeatures< float64_t > * m_seq_sparse2
Definition: DynProg.h:719
class Plif
Definition: Plif.h:40
int32_t * m_num_probes_cum
Definition: DynProg.h:746
int32_t * m_num_lin_feat_plifs_cum
Definition: DynProg.h:748
CDynamicArray< float64_t > m_initial_state_distribution_p
initial distribution of states
Definition: DynProg.h:611
CDynamicArray< int32_t > m_mod_words
Definition: DynProg.h:636
class IntronList
Definition: IntronList.h:22
CDynamicArray< float64_t > m_transition_matrix_a_deriv
Definition: DynProg.h:608
void set_p(T_STATES offset, float64_t value)
Definition: DynProg.h:343
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:124
CDynamicArray< int32_t > m_num_unique_words
Definition: DynProg.h:651
CDynamicArray< float64_t > m_transition_matrix_a
Definition: DynProg.h:607
CDynamicArray< bool > m_genestr_stop
Definition: DynProg.h:726
CDynamicArray< int32_t > m_orf_info
Definition: DynProg.h:665
double float64_t
Definition: common.h:60
CDynamicArray< float64_t > m_segment_sum_weights
Definition: DynProg.h:667
int32_t * m_string_words_array
Definition: DynProg.h:646
CDynamicArray< int32_t > m_my_pos_seq
Definition: DynProg.h:698
CDynamicArray< int32_t > m_states
Definition: DynProg.h:712
CDynamicArray< int32_t > m_cum_num_words
Definition: DynProg.h:628
CDynamicArray< int32_t > m_string_words
Definition: DynProg.h:644
CDynamicArray< int32_t > m_pos
Definition: DynProg.h:661
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
int32_t m_long_transition_threshold
Definition: DynProg.h:756
float64_t get_q_deriv(T_STATES offset) const
Definition: DynProg.h:374
CDynamicArray< float64_t > m_end_state_distribution_q
distribution of end-states
Definition: DynProg.h:615
float64_t * get_lin_feat(int32_t &dim1, int32_t &dim2)
Definition: DynProg.h:410
void set_a(T_STATES line_, T_STATES column, float64_t value)
Definition: DynProg.h:354
float64_t * m_raw_intensities
Definition: DynProg.h:742
void set_q(T_STATES offset, float64_t value)
Definition: DynProg.h:334
CDynamicArray< float64_t > m_my_losses
Definition: DynProg.h:702
CDynamicArray< float64_t > m_segment_mask
Definition: DynProg.h:694
CDynamicArray< char > m_genestr
Definition: DynProg.h:671
CDynamicArray< float64_t > m_scores
Definition: DynProg.h:710
uint8_t T_STATES
Definition: HMM.h:62
float64_t get_p_deriv(T_STATES offset) const
Definition: DynProg.h:394
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CDynamicArray< float64_t > m_observation_matrix
Definition: DynProg.h:659
CDynamicArray< bool > m_sign_words
Definition: DynProg.h:640
CDynamicArray< int32_t > m_num_words
Definition: DynProg.h:632
CDynamicObjectArray m_plif_list
Definition: DynProg.h:669
CDynamicArray< float64_t > m_my_scores
Definition: DynProg.h:700
int32_t * m_probe_pos
Definition: DynProg.h:744
Dynamic Programming Class.
Definition: DynProg.h:74
int32_t * m_num_words_array
Definition: DynProg.h:634
CIntronList * m_intron_list
Definition: DynProg.h:730
int32_t m_num_raw_data
Definition: DynProg.h:750
virtual const char * get_name() const
Definition: DynProg.h:563
int32_t m_num_svms
Definition: DynProg.h:623
void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
Definition: DynProg.h:478
CSegmentLoss * m_seg_loss_obj
Definition: DynProg.h:706
CSegmentLoss * get_segment_loss_object()
Definition: DynProg.h:467
CDynamicArray< int32_t > m_my_state_seq
Definition: DynProg.h:696
store plif arrays for all transitions in the model
Definition: PlifMatrix.h:31
bool * m_sign_words_array
Definition: DynProg.h:642

SHOGUN Machine Learning Toolbox - Documentation