Move to pdm

Signed-off-by: Olga Bulat <[email protected]>
obulat · Jul 14, 2024 · 688ddb9 · 688ddb9
1 parent be1870a
commit 688ddb9
Show file tree

Hide file tree

Showing 11 changed files with 155 additions and 101 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,33 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: build
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.9, '3.10', '3.11', '3.12']
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up PDM
+      uses: pdm-project/setup-pdm@v4
+      with:
+          python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: pdm install
+
+    - name: Install nltk punkt
+      run: python3 -m nltk.downloader punkt
+
+    - name: Test with pytest and coverage
+      run: pdm run pytest
+
+    - name: Codecov
+      uses: codecov/[email protected]
+
diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml
@@ -13,19 +13,19 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: '3.x'
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+          pdm install
+          pdm build
+
     - name: Build and publish to real pypi
       env:
         TWINE_USERNAME: __token__
         TWINE_PASSWORD: ${{ secrets.pypi_password }}
       run: |
-        python setup.py sdist bdist_wheel
-        twine upload dist/*
+          pdm publish
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ ENV/
 .mypy_cache/
 /vvenv/
 /.idea/
+
+# pdm stuff
+.pdm-python
diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+# Zeyrek: Morphological Analyzer and Lemmatizer
+
+![PyPI - Version](https://img.shields.io/pypi/v/:packageName)
+
+Zeyrek is a partial port of the [Zemberek library](https://github.com/ahmetaa/zemberek-nlp) to Python for lemmatizing
+and analyzing Turkish language words. It is in alpha stage, and the API
+will probably change.
+
+
+* Free software: MIT license
+* Documentation: https://zeyrek.readthedocs.io.
+
+
+## Basic Usage
+
+To use Zeyrek, first create an instance of MorphAnalyzer class::
+
+```python
+import zeyrek
+analyzer = zeyrek.MorphAnalyzer()
+```
+
+Then, you can call its `analyze` method on words or texts to get all possible analyses::
+
+```python
+print(analyzer.analyze('benim'))
+Parse(word='benim', lemma='ben', pos='Noun', morphemes=['Noun', 'A3sg', 'P1sg'], formatted='[ben:Noun] ben:Noun+A3sg+im:P1sg')
+Parse(word='benim', lemma='ben', pos='Pron', morphemes=['Pron', 'A1sg', 'Gen'], formatted='[ben:Pron,Pers] ben:Pron+A1sg+im:Gen')
+Parse(word='benim', lemma='ben', pos='Verb', morphemes=['Noun', 'A3sg', 'Zero', 'Verb', 'Pres', 'A1sg'], formatted='[ben:Noun] ben:Noun+A3sg|Zero→Verb+Pres+im:A1sg')
+Parse(word='benim', lemma='ben', pos='Verb', morphemes=['Pron', 'A1sg', 'Zero', 'Verb', 'Pres', 'A1sg'], formatted='[ben:Pron,Pers] ben:Pron+A1sg|Zero→Verb+Pres+im:A1sg')
+```
+If you only need the base form of words, or lemmas, you can call `lemmatize`. It returns a list
+of tuples, with word itself and a list of possible lemmas::
+
+```python
+print(analyzer.lemmatize('benim'))
+[('benim', ['ben'])]
+```
+
+
+## Credits
+
+This package is a Python port of part of the [Zemberek](https://github.com/ahmetaa/zemberek-nlp) package by [Ahmet A. Akın](https://github.com/ahmetaa)
+
+
+This package was created with
+[Cookiecutter](https://github.com/audreyr/cookiecutter) and the
+[audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage)
+project template.
+
diff --git a/README.rst b/README.rst
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,57 @@
+[project]
+name = "zeyrek"
+version = "0.1.4"
+description = "Python morphological analyzer and lemmatizer for Turkish"
+authors = [
+    {name = "Olga Bulat", email = "[email protected]"},
+]
+dependencies = [
+    "alabaster==0.7.16",
+    "Babel==2.15.0",
+    "bleach==6.1.0",
+    "certifi==2024.7.4",
+    "chardet==5.2.0",
+    "click==8.1.7",
+    "colorama==0.4.6",
+    "coverage==7.6.0",
+    "docutils>=0.19",
+    "idna==3.7",
+    "imagesize==1.4.1",
+    "importlib-metadata==8.0.0",
+    "Jinja2==3.1.4",
+    "joblib==1.4.2",
+    "keyring==25.2.1",
+    "MarkupSafe==2.1.5",
+    "nltk==3.8.1",
+    "packaging==24.1",
+    "pkginfo==1.11.1",
+    "Pygments==2.18.0",
+    "pyparsing==3.1.2",
+    "pytest==8.2.2",
+    "pytz==2024.1",
+    "pywin32-ctypes==0.2.0",
+    "readme-renderer==35.0",
+    "regex==2024.5.15",
+    "requests==2.32.3",
+    "requests-toolbelt==1.0.0",
+    "six==1.16.0",
+    "snowballstemmer==2.2.0",
+    "Sphinx==5.3.0",
+    "sphinx-rtd-theme==2.0.0",
+    "tqdm==4.64.0",
+    "twine==4.0.0",
+    "urllib3==1.26.18",
+    "webencodings==0.5.1",
+    "zipp==3.19.2",
+]
+requires-python = ">=3.9"
+readme = "README.md"
+license = {text = "MIT"}
+
+[build-system]
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
+
+
+[tool.pdm]
+distribution = true
diff --git a/src/zeyrek/__init__.py b/src/zeyrek/__init__.py
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
@@ -11,6 +11,9 @@
 from zeyrek.morphotactics import StemTransition, SearchPath, root_S, noun_S, MorphemeState, p2sg_S, loc_ST, a3pl_S, \
     SuffixTransition, morphemes, equ_ST, TurkishMorphotactics, adjectiveRoot_ST
 
+import nltk
+nltk.download('punkt')
+
 
 @pytest.fixture
 def dict_item():

diff --git a/tests/test_conditions.py b/tests/test_conditions.py
@@ -16,6 +16,9 @@
     adjectiveRoot_ST, verbRoot_S, become_S, vPast_S, past, verb, vCausTir_S, \
     nom_ST, vAgt_S, a3sg_S, pnon_S, morphemes, agt, a3sg, noun, pnon, nom, vPass_S, vAble_S
 
+import nltk
+nltk.download('punkt')
+
 lex = RootLexicon.from_lines(["adak", "elma", "beyaz [P:Adj]", "meyve"])