diff --git a/.circleci/config.yml b/.circleci/config.yml index aca57512..1e56e542 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -48,6 +48,11 @@ jobs: docker: - image: cimg/python:3.10 + py311: + <<: *shared + docker: + - image: cimg/python:3.11 + workflows: version: 2 @@ -57,3 +62,4 @@ workflows: - py38 - py39 - py310 + - py311 diff --git a/bin/WordEmbedAPI b/bin/WordEmbedAPI deleted file mode 100644 index b9f3af8b..00000000 --- a/bin/WordEmbedAPI +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - -# argument parsing -import argparse - -argparser = argparse.ArgumentParser(description='Load word-embedding models into memory.') -argparser.add_argument('filepath', help='file path of the word-embedding model') -argparser.add_argument('--port', type=int, default=5000, help='port number') -argparser.add_argument('--embedtype', default='word2vec', help='type of word-embedding algorithm (default: "word2vec), allowing "word2vec", "fasttext", and "poincare"') -argparser.add_argument('--debug', default=False, action='store_true', help='Debug mode (Default: False)') -args = argparser.parse_args() - - -from flask import Flask, request, jsonify -import shorttext - -app = Flask(__name__) -if args.embedtype == 'word2vec': - w2v_model = shorttext.utils.load_word2vec_model(args.filepath, binary=True) -elif args.embedtype == 'fasttext': - w2v_model = shorttext.utils.load_fasttext_model(args.filepath) -elif args.embedtype == 'poincare': - w2v_model = shorttext.utils.load_poincare_model(args.filepath, binary=True) -else: - raise KeyError("Argument 'embedtype' {} unknown.".format(args.embedtype)) - - -@app.route('/closerthan',methods=['POST']) -def closer_than(): - data = request.get_json(force=True) - entity1 = data['entity1'] - entity2 = data['entity2'] - close_entities = w2v_model.closer_than(entity1, entity2) - return jsonify(close_entities) - - -@app.route('/distance',methods=['POST']) -def distance(): - data = request.get_json(force=True) - entity1 = data['entity1'] - entity2 = data['entity2'] - distance = w2v_model.distance(entity1, entity2) - return jsonify({'distance': distance}) - - -@app.route('/distances',methods=['POST']) -def distances(): - data = request.get_json(force=True) - entity1 = data['entity1'] - other_entities = tuple(data['other_entities']) - distances = w2v_model.distances(entity1, other_entities) - return jsonify({'distances': list([float(distance) for distance in distances])}) - - -@app.route('/get_vector',methods=['POST']) -def get_vector(): - data = request.get_json(force=True) - token = data['token'] - try: - vector = w2v_model.get_vector(token) - return jsonify({'vector': vector.tolist()}) - except KeyError: - return jsonify({}) - - -@app.route('/most_similar',methods=['POST']) -def most_similar(): - keyword_args = request.get_json(force=True) - returned_results = w2v_model.most_similar(**keyword_args) - return jsonify(returned_results) - - -@app.route('/most_similar_to_given',methods=['POST']) -def most_similar_to_given(): - data = request.get_json(force=True) - entity1 = data['entity1'] - entities_list = data['entities_list'] - entity = w2v_model.most_similar_to_given(entity1, entities_list) - return jsonify({'token': entity}) - - -@app.route('/rank',methods=['POST']) -def rank(): - data = request.get_json(force=True) - entity1 = data['entity1'] - entity2 = data['entity2'] - rank = w2v_model.rank(entity1, entity2) - return jsonify({'rank': rank}) - - -@app.route('/similarity',methods=['POST']) -def similarity(): - data = request.get_json(force=True) - entity1 = data['entity1'] - entity2 = data['entity2'] - similarity = w2v_model.similarity(entity1, entity2) - return jsonify({'similarity': float(similarity)}) - - -if __name__ == "__main__": - app.run(debug=args.debug, port=args.port) \ No newline at end of file diff --git a/docs/install.rst b/docs/install.rst index 4482978d..95aa502a 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -72,7 +72,6 @@ Required Packages - Pandas_ (Python Data Analysis Library, version >= 1.0.0) - snowballstemmer_ (Snowball Stemmer, version >= 2.0.0) - TensorFlow_ (TensorFlow, version >= 2.0.0) -- Flask_ (Flask, version >= 1.1.0) - Joblib_ (Joblib: lightweight Python pipelining, version >= 0.14) Home: :doc:`index` @@ -88,6 +87,4 @@ Home: :doc:`index` .. _gensim: https://radimrehurek.com/gensim/ .. _Pandas: http://pandas.pydata.org/ .. _snowballstemmer: https://github.com/snowballstem/snowball -.. _TensorFlow: https://www.tensorflow.org/ -.. _Flask: https://flask.palletsprojects.com/ .. _Joblib: https://joblib.readthedocs.io/en/latest/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ec076999..e4ea69bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ Cython>=0.29.0 numpy>=1.16.0 scipy>=1.6.0 joblib>=0.14 -scikit-learn>=0.23.0 +scikit-learn>=0.22.0 tensorflow>=2.5.0 keras>=2.4.0 gensim>=4.0.0 diff --git a/setup.py b/setup.py index 766ce5ea..39fd75f8 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def test_requirements(): setup(name='shorttext', - version='1.5.8', + version='1.5.9a1', description="Short Text Mining", long_description=package_description(), long_description_content_type='text/markdown', @@ -86,8 +86,7 @@ def test_requirements(): setup_requires=setup_requirements(), install_requires=install_requirements(), scripts=['bin/ShortTextCategorizerConsole', - 'bin/ShortTextWordEmbedSimilarity', - 'bin/WordEmbedAPI'], + 'bin/ShortTextWordEmbedSimilarity'], test_suite="test", tests_requires=test_requirements(), zip_safe=False)