update from correct upstream

TwFlem · Jan 19, 2024 · 2707a4d · 2707a4d
2 parents 77ca52d + 3248776
commit 2707a4d
Show file tree

Hide file tree

Showing 26 changed files with 2,843 additions and 99 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+# .gitattributes
+*justfile linguist-vendored=true
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -17,3 +17,4 @@ jobs:
         uses: golangci/[email protected]
         with:
           version: latest
+          args: -c .golangci.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
-- Basic BWT for sub-sequence count and offset for sequence alignment. Only supports exact matches for now.
+- Moved `BWT`, `align`, and `mash` packages to new `search` sub-directory.
 
 
 ## [0.30.0] - 2023-12-18

diff --git a/fold/fold_test.go b/fold/fold_test.go
@@ -1,6 +1,7 @@
 package fold
 
 import (
+	"fmt"
 	"math"
 	"strings"
 	"testing"
@@ -201,3 +202,13 @@ func TestFold(t *testing.T) {
 		assert.InDelta(t, struc.energy, -4.2, 0.2)
 	})
 }
+func TestZuker_ErrorCreatingFoldingContext(t *testing.T) {
+	seq := "ATGGATTTAGATAGATADFQ#(RSDOFIA)"
+	temp := 4000.0
+
+	expectedErr := fmt.Errorf("error creating folding context: the sequence ATGGATTTAGATAGATADFQ#(RSDOFIA) is not RNA or DNA")
+
+	_, err := Zuker(seq, temp)
+	require.Error(t, err)
+	assert.Equal(t, expectedErr.Error(), err.Error())
+}
diff --git a/fold/seqfold_test.go b/fold/seqfold_test.go
@@ -0,0 +1,43 @@
+package fold
+
+import (
+	"fmt"
+	"math"
+	"testing"
+)
+
+func TestResult_MinimumFreeEnergy_LengthZero(t *testing.T) {
+	result := Result{} // Create a Result instance with empty structs
+
+	expectedEnergy := math.Inf(1)
+	actualEnergy := result.MinimumFreeEnergy()
+
+	if actualEnergy != expectedEnergy {
+		t.Errorf("expected energy to be %f, but got %f", expectedEnergy, actualEnergy)
+	}
+}
+
+func TestResult_DotBracket_LengthZero(t *testing.T) {
+	result := Result{} // Create a Result instance with empty structs
+
+	expectedDotBracket := ""
+	actualDotBracket := result.DotBracket()
+
+	if actualDotBracket != expectedDotBracket {
+		t.Errorf("expected dot bracket to be %s, but got %s", expectedDotBracket, actualDotBracket)
+	}
+}
+
+func TestNewFoldingContext_InvalidSequence(t *testing.T) {
+	seq := "XYZ"
+	temp := 37.0
+
+	_, err := newFoldingContext(seq, temp)
+	if err == nil {
+		t.Errorf("expected error, but got nil")
+	}
+	expectedError := fmt.Errorf("the sequence %s is not RNA or DNA", seq)
+	if err.Error() != expectedError.Error() {
+		t.Errorf("expected error message to be %q, but got %q", expectedError.Error(), err.Error())
+	}
+}
diff --git a/align/align.go → search/align/align.go b/align/align.go → search/align/align.go
@@ -66,7 +66,7 @@ Tim
 package align
 
 import (
-	"github.com/bebop/poly/align/matrix"
+	"github.com/bebop/poly/search/align/matrix"
 )
 
 // Scoring is a struct that holds the scoring matrix for match, mismatch, and gap penalties.

diff --git a/align/align_test.go → search/align/align_test.go b/align/align_test.go → search/align/align_test.go
@@ -3,9 +3,9 @@ package align_test
 import (
 	"testing"
 
-	"github.com/bebop/poly/align"
-	"github.com/bebop/poly/align/matrix"
 	"github.com/bebop/poly/alphabet"
+	"github.com/bebop/poly/search/align"
+	"github.com/bebop/poly/search/align/matrix"
 )
 
 func TestNeedlemanWunsch(t *testing.T) {

diff --git a/align/example_test.go → search/align/example_test.go b/align/example_test.go → search/align/example_test.go
@@ -4,9 +4,9 @@ package align_test
 import (
 	"fmt"
 
-	"github.com/bebop/poly/align"
-	"github.com/bebop/poly/align/matrix"
 	"github.com/bebop/poly/alphabet"
+	"github.com/bebop/poly/search/align"
+	"github.com/bebop/poly/search/align/matrix"
 )
 
 func ExampleNeedlemanWunsch() {

diff --git a/align/matrix/matrices.go → search/align/matrix/matrices.go b/align/matrix/matrices.go → search/align/matrix/matrices.go
diff --git a/align/matrix/matrix.go → search/align/matrix/matrix.go b/align/matrix/matrix.go → search/align/matrix/matrix.go
diff --git a/align/matrix/matrix_test.go → search/align/matrix/matrix_test.go b/align/matrix/matrix_test.go → search/align/matrix/matrix_test.go
@@ -3,8 +3,8 @@ package matrix_test
 import (
 	"testing"
 
-	"github.com/bebop/poly/align/matrix"
 	"github.com/bebop/poly/alphabet"
+	"github.com/bebop/poly/search/align/matrix"
 	"github.com/stretchr/testify/assert"
 )
 

diff --git a/search/bwt/bitvector.go b/search/bwt/bitvector.go
@@ -0,0 +1,77 @@
+package bwt
+
+import (
+	"fmt"
+	"math"
+)
+
+const wordSize = 64
+
+// bitvector a sequence of 1's and 0's. You can also think
+// of this as an array of bits. This allows us to encode
+// data in a memory efficient manner.
+type bitvector struct {
+	bits         []uint64
+	numberOfBits int
+}
+
+// newBitVector will return an initialized bitvector with
+// the specified number of zeroed bits.
+func newBitVector(initialNumberOfBits int) bitvector {
+	capacity := getNumOfBitSetsNeededForNumOfBits(initialNumberOfBits)
+	bits := make([]uint64, capacity)
+	return bitvector{
+		bits:         bits,
+		numberOfBits: initialNumberOfBits,
+	}
+}
+
+// getBitSet gets the while word as some offset from the
+// bitvector. Useful if you'd prefer to work with the
+// word rather than with individual bits.
+func (b bitvector) getBitSet(bitSetPos int) uint64 {
+	return b.bits[bitSetPos]
+}
+
+// getBit returns the value of the bit at a given offset
+// True represents 1
+// False represents 0
+func (b bitvector) getBit(i int) bool {
+	b.checkBounds(i)
+
+	chunkStart := i / wordSize
+	offset := i % wordSize
+
+	return (b.bits[chunkStart] & (uint64(1) << (63 - offset))) != 0
+}
+
+// setBit sets the value of the bit at a given offset
+// True represents 1
+// False represents 0
+func (b bitvector) setBit(i int, val bool) {
+	b.checkBounds(i)
+
+	chunkStart := i / wordSize
+	offset := i % wordSize
+
+	if val {
+		b.bits[chunkStart] |= uint64(1) << (63 - offset)
+	} else {
+		b.bits[chunkStart] &= ^(uint64(1) << (63 - offset))
+	}
+}
+
+func (b bitvector) checkBounds(i int) {
+	if i >= b.len() || i < 0 {
+		msg := fmt.Sprintf("access of %d is out of bounds for bitvector with length %d", i, b.len())
+		panic(msg)
+	}
+}
+
+func (b bitvector) len() int {
+	return b.numberOfBits
+}
+
+func getNumOfBitSetsNeededForNumOfBits(n int) int {
+	return int(math.Ceil(float64(n) / wordSize))
+}
diff --git a/search/bwt/bitvector_test.go b/search/bwt/bitvector_test.go
@@ -0,0 +1,119 @@
+package bwt
+
+import (
+	"testing"
+)
+
+type GetBitTestCase struct {
+	position int
+	expected bool
+}
+
+func TestBitVector(t *testing.T) {
+	initialNumberOfBits := wordSize*10 + 1
+
+	bv := newBitVector(initialNumberOfBits)
+
+	if bv.len() != initialNumberOfBits {
+		t.Fatalf("expected len to be %d but got %d", initialNumberOfBits, bv.len())
+	}
+
+	for i := 0; i < initialNumberOfBits; i++ {
+		bv.setBit(i, true)
+	}
+
+	bv.setBit(3, false)
+	bv.setBit(11, false)
+	bv.setBit(13, false)
+	bv.setBit(23, false)
+	bv.setBit(24, false)
+	bv.setBit(25, false)
+	bv.setBit(42, false)
+	bv.setBit(63, false)
+	bv.setBit(64, false)
+	bv.setBit(255, false)
+	bv.setBit(256, false)
+
+	getBitTestCases := []GetBitTestCase{
+		{0, true},
+		{1, true},
+		{2, true},
+		{3, false},
+		{4, true},
+		{7, true},
+		{8, true},
+		{9, true},
+		{10, true},
+		{11, false},
+		{12, true},
+		{13, false},
+		{23, false},
+		{24, false},
+		{25, false},
+		{42, false},
+		{15, true},
+		{16, true},
+		{62, true},
+		{63, false},
+		{64, false},
+		// Test past the first word
+		{65, true},
+		{72, true},
+		{79, true},
+		{80, true},
+		{255, false},
+		{256, false},
+		{511, true},
+		{512, true},
+	}
+
+	for _, v := range getBitTestCases {
+		actual := bv.getBit(v.position)
+		if actual != v.expected {
+			t.Fatalf("expected %dth bit to be %t but got %t", v.position, v.expected, actual)
+		}
+	}
+}
+
+func TestBitVectorBoundPanic_GetBit_Lower(t *testing.T) {
+	defer func() { _ = recover() }()
+
+	initialNumberOfBits := wordSize*10 + 1
+	bv := newBitVector(initialNumberOfBits)
+	bv.getBit(-1)
+
+	t.Fatalf("expected get bit lower bound panic")
+}
+
+func TestBitVectorBoundPanic_GetBit_Upper(t *testing.T) {
+	defer func() { _ = recover() }()
+	initialNumberOfBits := wordSize*10 + 1
+	bv := newBitVector(initialNumberOfBits)
+	bv.getBit(initialNumberOfBits)
+
+	t.Fatalf("expected get bit upper bound panic")
+}
+
+func TestBitVectorBoundPanic_SetBit_Lower(t *testing.T) {
+	defer func() {
+		if r := recover(); r != nil {
+			return
+		}
+		t.Fatalf("expected set bit lower bound panic")
+	}()
+	initialNumberOfBits := wordSize*10 + 1
+	bv := newBitVector(initialNumberOfBits)
+	bv.setBit(-1, true)
+}
+
+func TestBitVectorBoundPanic_SetBit_Upper(t *testing.T) {
+	defer func() {
+		if r := recover(); r != nil {
+			return
+		}
+		t.Fatalf("expected set bit upper bound panic")
+	}()
+	initialNumberOfBits := wordSize*10 + 1
+	bv := newBitVector(initialNumberOfBits)
+	bv.setBit(initialNumberOfBits, true)
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# .gitattributes
		*justfile linguist-vendored=true