Skip to content

Commit

Permalink
Handle filter only queries. Short-circuit and return filtered results
Browse files Browse the repository at this point in the history
- For queries with only filters in them short-circuit and return
  filtered results. No need to run semantic search, re-ranking.
- Add client test for filter only query and quote query in client tests
  • Loading branch information
debanjum committed Sep 12, 2022
1 parent afc84de commit 1bfe9c4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 5 deletions.
5 changes: 5 additions & 0 deletions src/search_type/text_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False):
if entries is None or len(entries) == 0:
return [], []

# If query only had filters it'll be empty now. So short-circuit and return results.
if query.strip() == "":
hits = [{"corpus_id": id, "score": 1.0} for id, _ in enumerate(entries)]
return hits, entries

# Encode the query using the bi-encoder
start = time.time()
question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)
Expand Down
29 changes: 24 additions & 5 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# Standard Modules
from io import BytesIO
from PIL import Image
from urllib.parse import quote


# External Packages
from fastapi.testclient import TestClient

# Internal Packages
from src.main import app
from src.utils.config import SearchType
from src.utils.state import model, config
from src.search_type import text_search, image_search
from src.utils.rawconfig import ContentConfig, SearchConfig
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
from src.search_filter.word_filter import WordFilter
from src.search_filter.file_filter import FileFilter


# Arrange
Expand All @@ -23,7 +25,7 @@
# ----------------------------------------------------------------------------------------------------
def test_search_with_invalid_content_type():
# Arrange
user_query = "How to call Khoj from Emacs?"
user_query = quote("How to call Khoj from Emacs?")

# Act
response = client.get(f"/search?q={user_query}&t=invalid_content_type")
Expand Down Expand Up @@ -117,7 +119,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
def test_notes_search(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
user_query = "How to git install application?"
user_query = quote("How to git install application?")

# Act
response = client.get(f"/search?q={user_query}&n=1&t=org&r=true")
Expand All @@ -129,12 +131,29 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
assert "git clone" in search_result


# ----------------------------------------------------------------------------------------------------
def test_notes_search_with_only_filters(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
filters = [WordFilter(), FileFilter()]
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
user_query = quote('+"Emacs" file:"*.org"')

# Act
response = client.get(f"/search?q={user_query}&n=1&t=org")

# Assert
assert response.status_code == 200
# assert actual_data contains word "Emacs"
search_result = response.json()[0]["entry"]
assert "Emacs" in search_result


# ----------------------------------------------------------------------------------------------------
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
filters = [WordFilter()]
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
user_query = 'How to git install application? +"Emacs"'
user_query = quote('How to git install application? +"Emacs"')

# Act
response = client.get(f"/search?q={user_query}&n=1&t=org")
Expand All @@ -151,7 +170,7 @@ def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_
# Arrange
filters = [WordFilter()]
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
user_query = 'How to git install application? -"clone"'
user_query = quote('How to git install application? -"clone"')

# Act
response = client.get(f"/search?q={user_query}&n=1&t=org")
Expand Down

0 comments on commit 1bfe9c4

Please sign in to comment.