diff --git a/python-tiktoken.spec b/python-tiktoken.spec new file mode 100644 index 0000000000000000000000000000000000000000..a17527c7765b2d477d396aab619c960ebc07ba59 --- /dev/null +++ b/python-tiktoken.spec @@ -0,0 +1,85 @@ +%define anolis_release 1 +%global debug_package %{nil} +%global pypi_name tiktoken +%global pypi_version 0.5.2 + +Name: python-%{pypi_name} +Version: %{pypi_version} +Release: %{anolis_release}%{?dist} +Summary: tiktoken is a fast BPE tokeniser for use with OpenAI's models + +License: MIT License + +URL: None +Source0: %{pypi_source} + +# cargo vendor --no-delete > .cargo/config.toml +# tar -czf vendor.tar.gz vendor .cargo/ +Source1: vendor.tar.gz + +BuildRequires: python3-devel +BuildRequires: python3dist(regex) >= 2022.1.18 +BuildRequires: python3dist(requests) >= 2.26 +BuildRequires: python3dist(setuptools) +BuildRequires: python3dist(setuptools-rust) +BuildRequires: python3dist(hypothesis) +BuildRequires: pytest + +%description +Tiktoken is a fast Byte Pair Encoding (BPE) tokenizer designed for use with OpenAI’s +language models. It provides an efficient way to convert text into tokens, which are +the numerical representations that AI models operate on. Tiktoken is developed to be +reversible and lossless, meaning it can convert tokens back into original text, and it +works on any text, regardless of whether it was part of the tokenizer’s training data. + +The tokenizer is particularly optimized for performance and is reported to be up to 3-6 +times faster than comparable open-source tokenizers. It is designed to be used with +OpenAI’s models, and it can be easily integrated into code examples found in the OpenAI +Cookbook. + +%package -n python3-%{pypi_name} +Summary: %{summary} +%{?python_provide:%python_provide python3-%{pypi_name}} + +Recommends: python3dist(blobfile) >= 2 +Requires: python3dist(regex) >= 2022.1.18 +Requires: python3dist(requests) >= 2.26 +Requires: python3dist(hypothesis) +%description -n python3-%{pypi_name} +Tiktoken is a fast Byte Pair Encoding (BPE) tokenizer designed for use with OpenAI’s +language models. It provides an efficient way to convert text into tokens, which are +the numerical representations that AI models operate on. Tiktoken is developed to be +reversible and lossless, meaning it can convert tokens back into original text, and it +works on any text, regardless of whether it was part of the tokenizer’s training data. + +The tokenizer is particularly optimized for performance and is reported to be up to 3-6 +times faster than comparable open-source tokenizers. It is designed to be used with +OpenAI’s models, and it can be easily integrated into code examples found in the OpenAI +Cookbook. + + +%prep +%autosetup -n %{pypi_name}-%{pypi_version} +tar -xzf %{SOURCE1} +# Remove bundled egg-info +rm -rf %{pypi_name}.egg-info + +%build +%py3_build + +%install +%py3_install + +%check +%{__python3} setup.py test + +%files -n python3-%{pypi_name} +%license LICENSE +%doc README.md +%{python3_sitearch}/%{pypi_name} +%{python3_sitearch}/tiktoken_ext +%{python3_sitearch}/%{pypi_name}-%{pypi_version}-py%{python3_version}.egg-info + +%changelog +* Wed Jan 10 2024 zhongling.h - 0.5.2-1 +- Initial package. diff --git a/tiktoken-0.5.2.tar.gz b/tiktoken-0.5.2.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e1e22781349021a040af6a21a2de9e588eee454d Binary files /dev/null and b/tiktoken-0.5.2.tar.gz differ diff --git a/vendor.tar.gz b/vendor.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..56da77a53c637337e6f9f0e8d611baceb0df6f0f Binary files /dev/null and b/vendor.tar.gz differ