From a391badfe13148a0a742d22053f75655741eef22 Mon Sep 17 00:00:00 2001 From: YerbaPage <845039191@qq.com> Date: Sat, 11 Oct 2025 21:33:12 +0800 Subject: [PATCH] packaging --- MANIFEST.in | 7 ++ README.md | 103 +++--------------- demo.py | 10 +- experiments/README.md | 37 +++++++ .../long-code-completion}/code_compressor.py | 0 .../compare_empty_line_handling.py | 0 .../long-code-completion}/main.py | 0 .../long-code-completion}/run.sh | 0 .../long-code-completion}/utils.py | 0 .../module-summarization}/code_compressor.py | 0 .../module-summarization}/main.py | 0 .../module-summarization}/run.sh | 0 .../module-summarization}/utils.py | 0 {repo-qa => experiments/repo-qa}/__init__.py | 0 .../repo-qa}/code_compressor.py | 0 .../repo-qa}/code_segment_extractor.py | 0 .../repo-qa}/compute_score.py | 0 {repo-qa => experiments/repo-qa}/data.py | 0 {repo-qa => experiments/repo-qa}/main.py | 0 {repo-qa => experiments/repo-qa}/metric.py | 0 .../repo-qa}/provider/__init__.py | 0 .../repo-qa}/provider/anthropic.py | 0 .../repo-qa}/provider/base.py | 0 .../repo-qa}/provider/google.py | 0 .../repo-qa}/provider/hf.py | 0 .../repo-qa}/provider/openai.py | 0 .../repo-qa}/provider/request/__init__.py | 0 .../repo-qa}/provider/request/anthropic.py | 0 .../repo-qa}/provider/request/google.py | 0 .../repo-qa}/provider/request/openai.py | 0 .../repo-qa}/provider/vllm.py | 0 {repo-qa => experiments/repo-qa}/run.sh | 0 {repo-qa => experiments/repo-qa}/utility.py | 0 longcodezip.py => longcodezip/__init__.py | 99 ++++++++++++++++- pyproject.toml | 72 ++++++++++++ requirements.txt | 7 +- setup.py | 45 ++++++++ 37 files changed, 282 insertions(+), 98 deletions(-) create mode 100644 MANIFEST.in create mode 100644 experiments/README.md rename {long-code-completion => experiments/long-code-completion}/code_compressor.py (100%) rename {long-code-completion => experiments/long-code-completion}/compare_empty_line_handling.py (100%) rename {long-code-completion => experiments/long-code-completion}/main.py (100%) rename {long-code-completion => experiments/long-code-completion}/run.sh (100%) rename {long-code-completion => experiments/long-code-completion}/utils.py (100%) rename {module-summarization => experiments/module-summarization}/code_compressor.py (100%) rename {module-summarization => experiments/module-summarization}/main.py (100%) rename {module-summarization => experiments/module-summarization}/run.sh (100%) rename {module-summarization => experiments/module-summarization}/utils.py (100%) rename {repo-qa => experiments/repo-qa}/__init__.py (100%) rename {repo-qa => experiments/repo-qa}/code_compressor.py (100%) rename {repo-qa => experiments/repo-qa}/code_segment_extractor.py (100%) rename {repo-qa => experiments/repo-qa}/compute_score.py (100%) rename {repo-qa => experiments/repo-qa}/data.py (100%) rename {repo-qa => experiments/repo-qa}/main.py (100%) rename {repo-qa => experiments/repo-qa}/metric.py (100%) rename {repo-qa => experiments/repo-qa}/provider/__init__.py (100%) rename {repo-qa => experiments/repo-qa}/provider/anthropic.py (100%) rename {repo-qa => experiments/repo-qa}/provider/base.py (100%) rename {repo-qa => experiments/repo-qa}/provider/google.py (100%) rename {repo-qa => experiments/repo-qa}/provider/hf.py (100%) rename {repo-qa => experiments/repo-qa}/provider/openai.py (100%) rename {repo-qa => experiments/repo-qa}/provider/request/__init__.py (100%) rename {repo-qa => experiments/repo-qa}/provider/request/anthropic.py (100%) rename {repo-qa => experiments/repo-qa}/provider/request/google.py (100%) rename {repo-qa => experiments/repo-qa}/provider/request/openai.py (100%) rename {repo-qa => experiments/repo-qa}/provider/vllm.py (100%) rename {repo-qa => experiments/repo-qa}/run.sh (100%) rename {repo-qa => experiments/repo-qa}/utility.py (100%) rename longcodezip.py => longcodezip/__init__.py (95%) create mode 100644 pyproject.toml create mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..2ca2016 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,7 @@ +include README.md +include LICENSE +include requirements.txt +recursive-include longcodezip *.py +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + diff --git a/README.md b/README.md index 186262c..f639ce2 100644 --- a/README.md +++ b/README.md @@ -21,37 +21,20 @@ LongCodeZip introduces a two-stage code compression framework specifically desig The method is plug-and-play and can be integrated with existing code LLMs to achieve significant compression ratios while maintaining or improving task performance. -## Repository Structure - -This repository contains implementations and experiments for three code-related tasks: - -``` -LongCodeZip/ -├── repo-qa/ # Code Retrieval Task -│ ├── main.py # Main evaluation script -│ ├── run.sh # Experiment runner -│ ├── code_compressor.py # Core compression implementation -│ ├── compute_score.py # Evaluation metrics -│ └── ... -├── long-code-completion/ # Code Completion Task -│ ├── main.py # Main evaluation script -│ ├── run.sh # Experiment runner -│ ├── code_compressor.py # Core compression implementation -│ ├── utils.py # Utility functions -│ └── ... -├── module-summarization/ # Code Summarization Task -│ ├── main.py # Main evaluation script -│ ├── run.sh # Experiment runner -│ ├── code_compressor.py # Core compression implementation -│ ├── utils.py # Utility functions -│ └── ... -└── README.md -``` - ## Installation +You can install directly from the GitHub repository: + ```bash -pip install -r requirements.txt +pip install git+https://github.com/YerbaPage/LongCodeZip.git +``` + +Or clone and install in development mode: + +```bash +git clone https://github.com/YerbaPage/LongCodeZip.git +cd LongCodeZip +pip install -e . ``` ## Quick Demo @@ -62,36 +45,21 @@ We provide a simple demo (`demo.py`) to help you get started with LongCodeZip: python demo.py ``` -This demo showcases the core compression functionality by compressing a simple code snippet containing multiple functions (add, quick_sort, search_with_binary_search) based on a query about quick sort. The compressor will: -1. Rank functions by relevance to the query -2. Apply fine-grained compression to maximize information density -3. Generate a compressed prompt suitable for code LLMs - -**Example output:** -```python -# Original: ~150 tokens -# Compressed: ~64 tokens (target) -# Selected: quick_sort function (most relevant to query) -``` - -## Core API Usage - -LongCodeZip provides a simple and powerful API for compressing long code contexts. Here's how to use it: - -### Basic Example +## Basic Example ```python -from longcodezip import CodeCompressor +from longcodezip import LongCodeZip # Initialize the compressor -compressor = CodeCompressor(model_name="Qwen/Qwen2.5-Coder-7B-Instruct") +compressor = LongCodeZip(model_name="Qwen/Qwen2.5-Coder-7B-Instruct") # Compress code with a query result = compressor.compress_code_file( - code=your_code_string, - query="What does this function do?", - instruction="Answer the question based on the code.", + code=, + query=, + instruction=, rate=0.5, # Keep 50% of tokens + rank_only=False, # Set to True to only rank and select contexts without fine-grained compression ) # Access compressed results @@ -99,41 +67,6 @@ compressed_code = result['compressed_code'] compressed_prompt = result['compressed_prompt'] # Full prompt with instruction compression_ratio = result['compression_ratio'] ``` -## Usage - -### Quick Start - -Each task directory contains a `run.sh` script for easy experimentation. Simply navigate to the desired task directory and run: - -```bash -cd -bash run.sh -``` - -### Code Retrieval (RepoQA) - -Navigate to the `repo-qa` directory and run experiments with different compression ratios: - -```bash -cd repo-qa -bash run.sh -``` - -The script will evaluate LongCodeZip on the RepoQA dataset with compression ratios, running experiments in parallel on multiple GPUs. - -**Key Parameters:** -- `--compression-ratio`: Controls the compression level -- `--model`: Specifies the base LLM model -- `--backend`: Backend for model inference (vllm) - -### Code Completion - -Navigate to the `long-code-completion` directory: - -```bash -cd long-code-completion -bash run.sh -``` ## References diff --git a/demo.py b/demo.py index 8371669..f4a335a 100644 --- a/demo.py +++ b/demo.py @@ -1,4 +1,4 @@ -from longcodezip import CodeCompressor +from longcodezip import LongCodeZip from loguru import logger if __name__ == "__main__": @@ -16,7 +16,7 @@ if __name__ == "__main__": # Initialize compressor logger.info("Initializing compressor...") model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" - compressor = CodeCompressor(model_name=model_name) + compressor = LongCodeZip(model_name=model_name) # Test function-based code file compression with query logger.info("\nTesting function-based code file compression with query...") @@ -24,7 +24,7 @@ if __name__ == "__main__": original_tokens = len(compressor.tokenizer.encode(context)) target_token = 64 target_ratio = min(1.0, max(0.0, target_token / original_tokens)) - logger.info(f"CodeCompressor: Original tokens={original_tokens}, Target tokens={target_token}, Calculated ratio={target_ratio:.4f}") + logger.info(f"LongCodeZip: Original tokens={original_tokens}, Target tokens={target_token}, Calculated ratio={target_ratio:.4f}") logger.info("\nTesting compression with Coarse-grained compression only...") result_cond = compressor.compress_code_file( @@ -35,6 +35,7 @@ if __name__ == "__main__": rank_only=True # Coarse-grained compression ) logger.info(f"Compressed prompt: \n{result_cond['compressed_prompt']}") + logger.info(f"Compression ratio: {result_cond['compression_ratio']:.4f}") # Compression ratio: 0.3856 logger.info("\nTesting compression with Coarse-grained and Fine-grained compression...") result_cond = compressor.compress_code_file( @@ -44,4 +45,5 @@ if __name__ == "__main__": rate=target_ratio, rank_only=False # Corase-grained and Fine-grained compression ) - logger.info(f"Compressed prompt: \n{result_cond['compressed_prompt']}") \ No newline at end of file + logger.info(f"Compressed prompt: \n{result_cond['compressed_prompt']}") + logger.info(f"Compression ratio: {result_cond['compression_ratio']:.4f}") # Compression ratio: 0.1468 \ No newline at end of file diff --git a/experiments/README.md b/experiments/README.md new file mode 100644 index 0000000..26f0595 --- /dev/null +++ b/experiments/README.md @@ -0,0 +1,37 @@ +# Experiments + +This folder contains the old experiments for the three code-related tasks. Some codes may be outdated after refactoring. + +### Quick Start + +Each task directory contains a `run.sh` script for easy experimentation. Simply navigate to the desired task directory and run: + +```bash +cd +bash run.sh +``` + +### Code Retrieval (RepoQA) + +Navigate to the `repo-qa` directory and run experiments with different compression ratios: + +```bash +cd repo-qa +bash run.sh +``` + +The script will evaluate LongCodeZip on the RepoQA dataset with compression ratios, running experiments in parallel on multiple GPUs. + +**Key Parameters:** +- `--compression-ratio`: Controls the compression level +- `--model`: Specifies the base LLM model +- `--backend`: Backend for model inference (vllm) + +### Code Completion + +Navigate to the `long-code-completion` directory: + +```bash +cd long-code-completion +bash run.sh +``` diff --git a/long-code-completion/code_compressor.py b/experiments/long-code-completion/code_compressor.py similarity index 100% rename from long-code-completion/code_compressor.py rename to experiments/long-code-completion/code_compressor.py diff --git a/long-code-completion/compare_empty_line_handling.py b/experiments/long-code-completion/compare_empty_line_handling.py similarity index 100% rename from long-code-completion/compare_empty_line_handling.py rename to experiments/long-code-completion/compare_empty_line_handling.py diff --git a/long-code-completion/main.py b/experiments/long-code-completion/main.py similarity index 100% rename from long-code-completion/main.py rename to experiments/long-code-completion/main.py diff --git a/long-code-completion/run.sh b/experiments/long-code-completion/run.sh similarity index 100% rename from long-code-completion/run.sh rename to experiments/long-code-completion/run.sh diff --git a/long-code-completion/utils.py b/experiments/long-code-completion/utils.py similarity index 100% rename from long-code-completion/utils.py rename to experiments/long-code-completion/utils.py diff --git a/module-summarization/code_compressor.py b/experiments/module-summarization/code_compressor.py similarity index 100% rename from module-summarization/code_compressor.py rename to experiments/module-summarization/code_compressor.py diff --git a/module-summarization/main.py b/experiments/module-summarization/main.py similarity index 100% rename from module-summarization/main.py rename to experiments/module-summarization/main.py diff --git a/module-summarization/run.sh b/experiments/module-summarization/run.sh similarity index 100% rename from module-summarization/run.sh rename to experiments/module-summarization/run.sh diff --git a/module-summarization/utils.py b/experiments/module-summarization/utils.py similarity index 100% rename from module-summarization/utils.py rename to experiments/module-summarization/utils.py diff --git a/repo-qa/__init__.py b/experiments/repo-qa/__init__.py similarity index 100% rename from repo-qa/__init__.py rename to experiments/repo-qa/__init__.py diff --git a/repo-qa/code_compressor.py b/experiments/repo-qa/code_compressor.py similarity index 100% rename from repo-qa/code_compressor.py rename to experiments/repo-qa/code_compressor.py diff --git a/repo-qa/code_segment_extractor.py b/experiments/repo-qa/code_segment_extractor.py similarity index 100% rename from repo-qa/code_segment_extractor.py rename to experiments/repo-qa/code_segment_extractor.py diff --git a/repo-qa/compute_score.py b/experiments/repo-qa/compute_score.py similarity index 100% rename from repo-qa/compute_score.py rename to experiments/repo-qa/compute_score.py diff --git a/repo-qa/data.py b/experiments/repo-qa/data.py similarity index 100% rename from repo-qa/data.py rename to experiments/repo-qa/data.py diff --git a/repo-qa/main.py b/experiments/repo-qa/main.py similarity index 100% rename from repo-qa/main.py rename to experiments/repo-qa/main.py diff --git a/repo-qa/metric.py b/experiments/repo-qa/metric.py similarity index 100% rename from repo-qa/metric.py rename to experiments/repo-qa/metric.py diff --git a/repo-qa/provider/__init__.py b/experiments/repo-qa/provider/__init__.py similarity index 100% rename from repo-qa/provider/__init__.py rename to experiments/repo-qa/provider/__init__.py diff --git a/repo-qa/provider/anthropic.py b/experiments/repo-qa/provider/anthropic.py similarity index 100% rename from repo-qa/provider/anthropic.py rename to experiments/repo-qa/provider/anthropic.py diff --git a/repo-qa/provider/base.py b/experiments/repo-qa/provider/base.py similarity index 100% rename from repo-qa/provider/base.py rename to experiments/repo-qa/provider/base.py diff --git a/repo-qa/provider/google.py b/experiments/repo-qa/provider/google.py similarity index 100% rename from repo-qa/provider/google.py rename to experiments/repo-qa/provider/google.py diff --git a/repo-qa/provider/hf.py b/experiments/repo-qa/provider/hf.py similarity index 100% rename from repo-qa/provider/hf.py rename to experiments/repo-qa/provider/hf.py diff --git a/repo-qa/provider/openai.py b/experiments/repo-qa/provider/openai.py similarity index 100% rename from repo-qa/provider/openai.py rename to experiments/repo-qa/provider/openai.py diff --git a/repo-qa/provider/request/__init__.py b/experiments/repo-qa/provider/request/__init__.py similarity index 100% rename from repo-qa/provider/request/__init__.py rename to experiments/repo-qa/provider/request/__init__.py diff --git a/repo-qa/provider/request/anthropic.py b/experiments/repo-qa/provider/request/anthropic.py similarity index 100% rename from repo-qa/provider/request/anthropic.py rename to experiments/repo-qa/provider/request/anthropic.py diff --git a/repo-qa/provider/request/google.py b/experiments/repo-qa/provider/request/google.py similarity index 100% rename from repo-qa/provider/request/google.py rename to experiments/repo-qa/provider/request/google.py diff --git a/repo-qa/provider/request/openai.py b/experiments/repo-qa/provider/request/openai.py similarity index 100% rename from repo-qa/provider/request/openai.py rename to experiments/repo-qa/provider/request/openai.py diff --git a/repo-qa/provider/vllm.py b/experiments/repo-qa/provider/vllm.py similarity index 100% rename from repo-qa/provider/vllm.py rename to experiments/repo-qa/provider/vllm.py diff --git a/repo-qa/run.sh b/experiments/repo-qa/run.sh similarity index 100% rename from repo-qa/run.sh rename to experiments/repo-qa/run.sh diff --git a/repo-qa/utility.py b/experiments/repo-qa/utility.py similarity index 100% rename from repo-qa/utility.py rename to experiments/repo-qa/utility.py diff --git a/longcodezip.py b/longcodezip/__init__.py similarity index 95% rename from longcodezip.py rename to longcodezip/__init__.py index 04721ea..1103794 100644 --- a/longcodezip.py +++ b/longcodezip/__init__.py @@ -11,6 +11,10 @@ import copy import bisect import json from loguru import logger +import sys +# set the level to info only, no need to show the debug messages +logger.remove() +logger.add(sys.stderr, level="INFO") class EntropyChunking: def __init__(self, model_name="Qwen/Qwen2.5-Coder-0.5B-Instruct"): @@ -192,7 +196,7 @@ class EntropyChunking: return chunks, sentences, ppls, spike_indices -class CodeCompressor: +class LongCodeZip: def __init__( self, model_name: str = "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4", @@ -200,7 +204,7 @@ class CodeCompressor: model_config: dict = {}, ): """ - Initialize the CodeCompressor with a language model for compression. + Initialize the LongCodeZip with a language model for compression. Args: model_name: The name of the model to load from HuggingFace @@ -1803,4 +1807,93 @@ class CodeCompressor: selected.add(idx) current_weight += weight - return selected \ No newline at end of file + return selected + +if __name__ == "__main__": + + context = """ + def add(a, b): + return a + b + + def quick_sort(arr): + if len(arr) <= 1: + return arr + pivot = arr[len(arr) // 2] + left = [x for x in arr if x < pivot] + middle = [x for x in arr if x == pivot] + right = [x for x in arr if x > pivot] + return quick_sort(left) + middle + quick_sort(right) + + def search_with_binary_search(arr, target): + left, right = 0, len(arr) - 1 + while left <= right: + mid = (left + right) // 2 + if arr[mid] == target: + return mid + elif arr[mid] < target: + left = mid + 1 + else: + right = mid - 1 + return -1 + """ + + question = "How to write a quick sort algorithm?" + + # Initialize compressor + logger.info("Initializing compressor...") + model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" + compressor = LongCodeZip(model_name=model_name) + + # Test function-based code file compression with query + logger.info("\nTesting function-based code file compression with query...") + + original_tokens = len(compressor.tokenizer.encode(context)) + target_token = 64 + target_ratio = min(1.0, max(0.0, target_token / original_tokens)) + logger.info(f"LongCodeZip: Original tokens={original_tokens}, Target tokens={target_token}, Calculated ratio={target_ratio:.4f}") + + result = compressor.compress_code_file( + code=context, + query=question, # Using current function context as query focus + instruction="Complete the following code function given the context.", + rate=target_ratio, + rank_only=True, # Only use coarse-grained compression + fine_grained_importance_method="conditional_ppl", # Explicitly test default + min_lines_for_fine_grained=5, # Min number of lines for fine-grained compression + importance_beta=0.5, # Sensitivity to importance score + use_knapsack=True, + ) + + # show the compressed code + logger.info(f"Compressed code (using {result['fine_grained_method_used']}): \n{result['compressed_code']}") + logger.info(f"Current function context: \n{question}") + # final prompt + final_prompt = result['compressed_prompt'] + # get the completion + tokenized_prompt = compressor.tokenizer(final_prompt, return_tensors="pt").to(compressor.device) + # Increase max_new_tokens for potentially longer completions + completion_ids = compressor.model.generate(**tokenized_prompt, max_new_tokens=128, pad_token_id=compressor.tokenizer.eos_token_id) + # Decode only the generated part, skipping special tokens + completion = compressor.tokenizer.decode(completion_ids[0][len(tokenized_prompt.input_ids[0]):], skip_special_tokens=True) + + # Basic cleanup: remove leading/trailing whitespace and potentially stop words if needed + completion = completion.strip() + # More robust cleanup: Find the first meaningful line if generation includes noise + completion_lines = [line for line in completion.split("\n") if line.strip() and not line.strip().startswith(("#", "//"))] # Simple comment removal + cleaned_completion = completion_lines[0] if completion_lines else completion # Take first non-comment line or original if none found + + logger.info(f"Cleaned Completion: {cleaned_completion}") + + # Optional: Test with conditional_ppl method + logger.info("\nTesting fine-grained compression with conditional_ppl...") + result_cond = compressor.compress_code_file( + code=context, + query=question, + instruction="Complete the following code function given the context.", + rate=target_ratio, + rank_only=False, + fine_grained_importance_method="conditional_ppl", + min_lines_for_fine_grained=5, + importance_beta=0.5 + ) + logger.info(f"Compressed code (using {result_cond['fine_grained_method_used']}): \n{result_cond['compressed_code']}") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c671de0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,72 @@ +[build-system] +requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "longcodezip" +version = "0.1.0" +description = "A novel two-stage long code compression method for code language models" +readme = "README.md" +requires-python = ">=3.9" +license = {text = "MIT"} +authors = [ + {name = "Yuling Shi"}, +] +keywords = [ + "code compression", + "language models", + "llm", + "code intelligence", + "nlp", +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "appdirs", + "datasets", + "editdistance", + "fire", + "loguru", + "matplotlib", + "nltk", + "numpy", + "openai>=1.0.0", + "rich", + "torch", + "transformers", + "tqdm", + "tree-sitter-languages", + "tempdir", + "wget", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "flake8", +] + +[project.urls] +Homepage = "https://github.com/YerbaPage/LongCodeZip" +Repository = "https://github.com/YerbaPage/LongCodeZip" +Documentation = "https://github.com/YerbaPage/LongCodeZip/blob/main/README.md" +"Bug Tracker" = "https://github.com/YerbaPage/LongCodeZip/issues" +"Paper" = "https://arxiv.org/abs/2510.00446" + +[tool.setuptools] +packages = ["longcodezip"] + +[tool.setuptools.package-data] +longcodezip = ["py.typed"] + diff --git a/requirements.txt b/requirements.txt index a1f25ea..98fec7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,7 @@ -anthropic appdirs datasets editdistance fire -google-api-core -google-generativeai -llmlingua loguru matplotlib nltk @@ -13,9 +9,8 @@ numpy openai>=1.0.0 rich torch -transformers==4.37.1 +transformers tqdm tree-sitter-languages tempdir -vllm wget diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6de39a8 --- /dev/null +++ b/setup.py @@ -0,0 +1,45 @@ +from setuptools import setup, find_packages + +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")] + +setup( + name="longcodezip", + version="0.1.0", + author="Yuling Shi", + author_email="", + description="A novel two-stage long code compression method for code language models", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/YerbaPage/LongCodeZip", + packages=find_packages(exclude=["repo-qa", "long-code-completion", "module-summarization", "assets"]), + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + python_requires=">=3.9", + install_requires=requirements, + extras_require={ + "dev": [ + "pytest", + "black", + "flake8", + ], + }, + include_package_data=True, + package_data={ + "longcodezip": ["py.typed"], + }, +) +