# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4

PortSystem          1.0
PortGroup           github 1.0
PortGroup           python 1.0

github.setup        google sentencepiece 0.1.93 v
revision            0
name                py-${github.project}

categories-append   textproc
license             Apache-2
maintainers         nomaintainer
platforms           darwin

description         Python wrapper for ${github.project}
long_description    SentencePiece is an unsupervised text tokenizer \
                    and detokenizer mainly for Neural Network-based \
                    text generation systems where the vocabulary size \
                    is predetermined prior to the neural model \
                    training. SentencePiece implements subword units \
                    (e.g., byte-pair-encoding (BPE) \[Sennrich et al.\]) \
                    and unigram language model \[Kudo.\]) with the \
                    extension of direct training from raw \
                    sentences. SentencePiece allows us to make a \
                    purely end-to-end system that does not depend on \
                    language-specific pre/postprocessing.

checksums           rmd160  76b1e1ae6e5b297ed07c749d7ff35f12e4852322 \
                    sha256  9a146e83dc6a403e431ae444dcc0c28203be81436566acc628edae42fc8901ec \
                    size    11684466

python.versions     37 38 39

if {${name} ne ${subport}} {
    depends_build-append \
                    port:pkgconfig \
                    port:py${python.version}-setuptools

    depends_lib-append \
                    port:sentencepiece

    worksrcdir      ${distname}/python

    compiler.cxx_standard 2011

    test.run        yes
    test.env        PYTHONPATH=${worksrcpath}/build/lib

    livecheck.type  none
}
