Skip to content

Commit

Permalink
Release 1.4: summary
Browse files Browse the repository at this point in the history
  • Loading branch information
jenniferliddle committed Mar 13, 2017
2 parents 0f1bc9d + f750382 commit d6a8801
Show file tree
Hide file tree
Showing 476 changed files with 60,395 additions and 5,383 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ plugins/*.dSYM
plugins/*.P

/test/test-rbuf
/test/test-regidx

/TAGS
12 changes: 12 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ env:
global:
- HTSDIR=./htslib

# For linux systems
addons:
apt:
packages:
- liblzma-dev
- libbz2-dev

# For MacOSX systems
before_install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install xz; fi

before_script:
# Clone samtools/htslib (or another repository, as specified by a Travis CI
# repository $HTSREPO setting) and check out a corresponding branch with the
Expand Down
141 changes: 100 additions & 41 deletions HMM.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@
#include <htslib/hts.h>
#include "HMM.h"

typedef struct
{
int nstates; // number of hmm's states
int isite; // take snapshot at i-th position
uint32_t pos; // i-th site's position
double *vit_prob; // viterbi probabilities, NULL for uniform probs
double *fwd_prob; // transition probabilities
double *bwd_prob; // transition probabilities
}
snapshot_t;

struct _hmm_t
{
int nstates; // number of states
Expand All @@ -50,7 +61,8 @@ struct _hmm_t
set_tprob_f set_tprob; // Optional user function to set / modify transition probabilities
// at each site (one step of Viterbi algorithm)
void *set_tprob_data;
double *init_probs; // Initial state probabilities, NULL for uniform probs
snapshot_t init; // Initial state probabilities. Set isite=1 when site should be used
snapshot_t *snapshot;
};

uint8_t *hmm_get_viterbi_path(hmm_t *hmm) { return hmm->vpath; }
Expand Down Expand Up @@ -78,28 +90,79 @@ static inline void multiply_matrix(int n, double *a, double *b, double *dst, dou
memcpy(dst,out,sizeof(double)*n*n);
}

void hmm_init_states(hmm_t *hmm, double *probs)
{
hmm->init.isite = 0;
hmm->init.pos = 0;
if ( !hmm->init.vit_prob )
hmm->init.vit_prob = (double*) malloc(sizeof(double)*hmm->nstates);
if ( !hmm->init.fwd_prob )
hmm->init.fwd_prob = (double*) malloc(sizeof(double)*hmm->nstates);
if ( !hmm->init.bwd_prob )
hmm->init.bwd_prob = (double*) malloc(sizeof(double)*hmm->nstates);

int i;
if ( probs )
{
memcpy(hmm->init.vit_prob,probs,sizeof(double)*hmm->nstates);
double sum = 0;
for (i=0; i<hmm->nstates; i++) sum += hmm->init.vit_prob[i];
for (i=0; i<hmm->nstates; i++) hmm->init.vit_prob[i] /= sum;
}
else
for (i=0; i<hmm->nstates; i++) hmm->init.vit_prob[i] = 1./hmm->nstates;

memcpy(hmm->init.fwd_prob,hmm->init.vit_prob,sizeof(double)*hmm->nstates);
memcpy(hmm->init.bwd_prob,hmm->init.vit_prob,sizeof(double)*hmm->nstates);
}
hmm_t *hmm_init(int nstates, double *tprob, int ntprob)
{
hmm_t *hmm = (hmm_t*) calloc(1,sizeof(hmm_t));
hmm->nstates = nstates;
hmm->curr_tprob = (double*) malloc(sizeof(double)*nstates*nstates);
hmm->tmp = (double*) malloc(sizeof(double)*nstates*nstates);

hmm_set_tprob(hmm, tprob, ntprob);

hmm_init_states(hmm, NULL);
return hmm;
}

void hmm_init_states(hmm_t *hmm, double *probs)
void *hmm_snapshot(hmm_t *hmm, void *_snapshot, int isite)
{
if ( !probs )
snapshot_t *snapshot = (snapshot_t*) _snapshot;
if ( snapshot && snapshot->nstates!=hmm->nstates )
{
free(hmm->init_probs);
hmm->init_probs = NULL;
free(snapshot);
snapshot = NULL;
}

if ( !hmm->init_probs ) hmm->init_probs = (double*) malloc(sizeof(double)*hmm->nstates);
memcpy(hmm->init_probs,probs,sizeof(double)*hmm->nstates);
if ( !snapshot )
{
// Allocate the snapshot as a single memory block so that it can be
// free()-ed by the user. So make sure the arrays are aligned..
size_t str_size = sizeof(snapshot_t);
size_t dbl_size = sizeof(double);
size_t pad_size = (dbl_size - str_size % dbl_size) % dbl_size;
uint8_t *mem = (uint8_t*) malloc(str_size + pad_size + dbl_size*2*hmm->nstates);
snapshot = (snapshot_t*) mem;
snapshot->nstates = hmm->nstates;
snapshot->vit_prob = (double*) (mem + str_size + pad_size);
snapshot->fwd_prob = snapshot->vit_prob + hmm->nstates;
}
snapshot->isite = isite;
hmm->snapshot = snapshot;
return snapshot;
}
void hmm_restore(hmm_t *hmm, void *_snapshot)
{
snapshot_t *snapshot = (snapshot_t*) _snapshot;
if ( !snapshot )
{
hmm->init.isite = 0;
return;
}
hmm->init.isite = 1;
hmm->init.pos = snapshot->pos;
memcpy(hmm->init.vit_prob,snapshot->vit_prob,sizeof(double)*hmm->nstates);
memcpy(hmm->init.fwd_prob,snapshot->fwd_prob,sizeof(double)*hmm->nstates);
}

void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob)
Expand Down Expand Up @@ -154,23 +217,18 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
hmm->vprob_tmp = (double*) malloc(sizeof(double)*hmm->nstates);
}


// Init all states with equal likelihood
int i,j, nstates = hmm->nstates;
if ( hmm->init_probs )
for (i=0; i<nstates; i++) hmm->vprob[i] = hmm->init_probs[i];
else
for (i=0; i<nstates; i++) hmm->vprob[i] = 1./nstates;
memcpy(hmm->vprob, hmm->init.vit_prob, sizeof(*hmm->init.vit_prob)*nstates);
uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];

// Run Viterbi
uint32_t prev_pos = sites[0];
for (i=0; i<n; i++)
{
uint8_t *vpath = &hmm->vpath[i*nstates];
double *eprob = &eprobs[i*nstates];

int pos_diff = sites[i] == prev_pos ? 0 : sites[i] - prev_pos - 1;

_set_tprob(hmm, pos_diff);
if ( hmm->set_tprob ) hmm->set_tprob(hmm, prev_pos, sites[i], hmm->set_tprob_data, hmm->curr_tprob);
prev_pos = sites[i];
Expand All @@ -191,6 +249,12 @@ void hmm_run_viterbi(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
}
for (j=0; j<nstates; j++) hmm->vprob_tmp[j] /= vnorm;
double *tmp = hmm->vprob; hmm->vprob = hmm->vprob_tmp; hmm->vprob_tmp = tmp;

if ( hmm->snapshot && i==hmm->snapshot->isite )
{
hmm->snapshot->pos = sites[i];
memcpy(hmm->snapshot->vit_prob, hmm->vprob, sizeof(*hmm->vprob)*nstates);
}
}

// Find the most likely state
Expand Down Expand Up @@ -224,19 +288,12 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)

// Init all states with equal likelihood
int i,j,k, nstates = hmm->nstates;
if ( hmm->init_probs )
{
for (i=0; i<nstates; i++) hmm->fwd[i] = hmm->init_probs[i];
for (i=0; i<nstates; i++) hmm->bwd[i] = hmm->init_probs[i];
}
else
{
for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;
}
memcpy(hmm->fwd, hmm->init.fwd_prob, sizeof(*hmm->init.fwd_prob)*nstates);
memcpy(hmm->bwd, hmm->init.bwd_prob, sizeof(*hmm->init.bwd_prob)*nstates);

uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];

// Run fwd
uint32_t prev_pos = sites[0];
for (i=0; i<n; i++)
{
double *fwd_prev = &hmm->fwd[i*nstates];
Expand All @@ -261,6 +318,13 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
for (j=0; j<nstates; j++) fwd[j] /= norm;
}

if ( hmm->snapshot )
{
i = hmm->snapshot->isite;
hmm->snapshot->pos = sites[i];
memcpy(hmm->snapshot->fwd_prob, hmm->fwd + (i+1)*nstates, sizeof(*hmm->fwd)*nstates);
}

// Run bwd
double *bwd = hmm->bwd, *bwd_tmp = hmm->bwd_tmp;
prev_pos = sites[n-1];
Expand Down Expand Up @@ -296,7 +360,7 @@ void hmm_run_fwd_bwd(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
}
}

void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
double *hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
{
// Init arrays when run for the first time
if ( hmm->nfwd < n )
Expand All @@ -312,24 +376,16 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)

// Init all states with equal likelihood
int i,j,k, nstates = hmm->nstates;
if ( hmm->init_probs )
{
for (i=0; i<nstates; i++) hmm->fwd[i] = hmm->init_probs[i];
for (i=0; i<nstates; i++) hmm->bwd[i] = hmm->init_probs[i];
}
else
{
for (i=0; i<nstates; i++) hmm->fwd[i] = 1./hmm->nstates;
for (i=0; i<nstates; i++) hmm->bwd[i] = 1./hmm->nstates;
}
memcpy(hmm->fwd, hmm->init.fwd_prob, sizeof(*hmm->init.fwd_prob)*nstates);
memcpy(hmm->bwd, hmm->init.bwd_prob, sizeof(*hmm->init.bwd_prob)*nstates);
uint32_t prev_pos = hmm->init.isite ? hmm->init.pos : sites[0];

// New transition matrix: temporary values
double *tmp_xi = (double*) calloc(nstates*nstates,sizeof(double));
double *tmp_gamma = (double*) calloc(nstates,sizeof(double));
double *fwd_bwd = (double*) malloc(sizeof(double)*nstates);

// Run fwd
uint32_t prev_pos = sites[0];
for (i=0; i<n; i++)
{
double *fwd_prev = &hmm->fwd[i*nstates];
Expand Down Expand Up @@ -416,11 +472,14 @@ void hmm_run_baum_welch(hmm_t *hmm, int n, double *eprobs, uint32_t *sites)
free(tmp_gamma);
free(tmp_xi);
free(fwd_bwd);
return hmm->curr_tprob;
}

void hmm_destroy(hmm_t *hmm)
{
free(hmm->init_probs);
free(hmm->init.vit_prob);
free(hmm->init.fwd_prob);
free(hmm->init.bwd_prob);
free(hmm->vprob);
free(hmm->vprob_tmp);
free(hmm->vpath);
Expand Down
26 changes: 22 additions & 4 deletions HMM.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ typedef void (*set_tprob_f) (hmm_t *hmm, uint32_t prev_pos, uint32_t pos, void *
hmm_t *hmm_init(int nstates, double *tprob, int ntprob);
void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob);

#define HMM_VIT 1
#define HMM_FWD 2
#define HMM_BWD 4

/**
* hmm_init_states() - initial state probabilities
* @probs: initial state probabilities or NULL to reset to default
Expand All @@ -52,6 +56,20 @@ void hmm_set_tprob(hmm_t *hmm, double *tprob, int ntprob);
*/
void hmm_init_states(hmm_t *hmm, double *probs);

/**
* hmm_snapshot() - take the model's snapshot, intended for sliding HMM
* @snapshot: NULL or snapshot returned by previous hmm_snapshot() call, must be free()-ed by the caller
* @isite: take the snapshot at i-th step
*/
void *hmm_snapshot(hmm_t *hmm, void *snapshot, int isite);

/**
* hmm_restore() - restore model's snapshot, intended for sliding HMM
* @snapshot: snapshot returned by hmm_snapshot() call or NULL to reset
* @isite: take the snapshot at i-th step
*/
void hmm_restore(hmm_t *hmm, void *snapshot);

/**
* hmm_get_tprob() - return the array of transition matrices, precalculated
* to ntprob positions. The first matrix is the initial tprob matrix
Expand Down Expand Up @@ -103,11 +121,11 @@ double *hmm_get_fwd_bwd_prob(hmm_t *hmm);
* @eprob: emission probabilities for each site and state (nsites x nstates)
* @sites: list of positions
*
* Same as hmm_run_fwd_bwd, in addition curr_tprob contains the new
* transition probabilities. In this verison, emission probabilities
* are not updated.
* Same as hmm_run_fwd_bwd, in addition a pointer to a matrix with the new
* transition probabilities is returned. In this verison, emission
* probabilities are not updated.
*/
void hmm_run_baum_welch(hmm_t *hmm, int nsites, double *eprob, uint32_t *sites);
double *hmm_run_baum_welch(hmm_t *hmm, int nsites, double *eprob, uint32_t *sites);

void hmm_destroy(hmm_t *hmm);

Expand Down
24 changes: 20 additions & 4 deletions INSTALL
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
System Requirements
===================

BCFtools and HTSlib depend on the zlib library <http://zlib.net>. Building
them requires zlib development files to be installed on the build machine;
you may need to ensure a package such as zlib1g-dev (on Debian or Ubuntu Linux)
or zlib-devel (on RPM/yum-based distributions) is installed.
BCFtools and HTSlib depend on the zlib library <http://zlib.net>, the bzip2
library <http://bzip.org/> and liblzma <http://tukaani.org/xz/>. Building
them requires development files to be installed on the build machine;
note that some Linux distributions package these separately from the library
itself (see below).

The bzip2 and liblzma dependencies can be removed if full CRAM support
is not needed - see HTSlib's INSTALL file for details.

Packages for dpkg-based Linux distributions (Debian / Ubuntu) are:

zlib1g-dev
libbz2-dev
liblzma-dev

Packages for rpm or yum-based Linux distributions (RedHat / Fedora / CentOS)
are:

zlib-devel
bzip2-devel
xz-devel

Compilation
===========
Expand Down
Loading

0 comments on commit d6a8801

Please sign in to comment.