Improve generate script

- Fix issue with __pycache__ dirs getting picked up
- parallelise code generation with asyncio for 3x speedup
- silence protoc output unless -v option is supplied
- Use pathlib ;)
This commit is contained in:
Nat Noordanus 2020-06-07 17:53:06 +02:00
parent 4b6f55dce5
commit 3185c67098
3 changed files with 99 additions and 72 deletions

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
import glob
import asyncio
import os
from pathlib import Path
import shutil
import subprocess
import sys
@ -20,58 +21,63 @@ from betterproto.tests.util import (
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
def clear_directory(path: str):
for file_or_directory in glob.glob(os.path.join(path, "*")):
if os.path.isdir(file_or_directory):
def clear_directory(dir_path: Path):
for file_or_directory in dir_path.glob("*"):
if file_or_directory.is_dir():
shutil.rmtree(file_or_directory)
else:
os.remove(file_or_directory)
file_or_directory.unlink()
def generate(whitelist: Set[str]):
path_whitelist = {os.path.realpath(e) for e in whitelist if os.path.exists(e)}
name_whitelist = {e for e in whitelist if not os.path.exists(e)}
async def generate(whitelist: Set[str], verbose: bool):
test_case_names = set(get_directories(inputs_path)) - {"__pycache__"}
test_case_names = set(get_directories(inputs_path))
failed_test_cases = []
path_whitelist = set()
name_whitelist = set()
for item in whitelist:
if item in test_case_names:
name_whitelist.add(item)
continue
path_whitelist.add(item)
generation_tasks = []
for test_case_name in sorted(test_case_names):
test_case_input_path = os.path.realpath(
os.path.join(inputs_path, test_case_name)
)
test_case_input_path = inputs_path.joinpath(test_case_name).resolve()
if (
whitelist
and test_case_input_path not in path_whitelist
and str(test_case_input_path) not in path_whitelist
and test_case_name not in name_whitelist
):
continue
generation_tasks.append(
generate_test_case_output(test_case_input_path, test_case_name, verbose)
)
print(f"Generating output for {test_case_name}")
try:
generate_test_case_output(test_case_name, test_case_input_path)
except subprocess.CalledProcessError as e:
failed_test_cases = []
# Wait for all subprocs and match any failures to names to report
for test_case_name, result in zip(
sorted(test_case_names), await asyncio.gather(*generation_tasks)
):
if result != 0:
failed_test_cases.append(test_case_name)
if failed_test_cases:
sys.stderr.write("\nFailed to generate the following test cases:\n")
sys.stderr.write(
"\n\033[31;1;4mFailed to generate the following test cases:\033[0m\n"
)
for failed_test_case in failed_test_cases:
sys.stderr.write(f"- {failed_test_case}\n")
def generate_test_case_output(test_case_name, test_case_input_path=None):
if not test_case_input_path:
test_case_input_path = os.path.realpath(
os.path.join(inputs_path, test_case_name)
)
async def generate_test_case_output(
test_case_input_path: Path, test_case_name: str, verbose: bool
) -> int:
"""
Returns the max of the subprocess return values
"""
test_case_output_path_reference = os.path.join(
output_path_reference, test_case_name
)
test_case_output_path_betterproto = os.path.join(
output_path_betterproto, test_case_name
)
test_case_output_path_reference = output_path_reference.joinpath(test_case_name)
test_case_output_path_betterproto = output_path_betterproto.joinpath(test_case_name)
os.makedirs(test_case_output_path_reference, exist_ok=True)
os.makedirs(test_case_output_path_betterproto, exist_ok=True)
@ -79,14 +85,36 @@ def generate_test_case_output(test_case_name, test_case_input_path=None):
clear_directory(test_case_output_path_reference)
clear_directory(test_case_output_path_betterproto)
protoc_reference(test_case_input_path, test_case_output_path_reference)
protoc_plugin(test_case_input_path, test_case_output_path_betterproto)
(
(ref_out, ref_err, ref_code),
(plg_out, plg_err, plg_code),
) = await asyncio.gather(
protoc_reference(test_case_input_path, test_case_output_path_reference),
protoc_plugin(test_case_input_path, test_case_output_path_betterproto),
)
message = f"Generated output for {test_case_name!r}"
if verbose:
print(f"\033[31;1;4m{message}\033[0m")
if ref_out:
sys.stdout.buffer.write(ref_out)
if ref_err:
sys.stderr.buffer.write(ref_err)
if plg_out:
sys.stdout.buffer.write(plg_out)
if plg_err:
sys.stderr.buffer.write(plg_err)
sys.stdout.buffer.flush()
sys.stderr.buffer.flush()
else:
print(message)
return max(ref_code, plg_code)
HELP = "\n".join(
[
"Usage: python generate.py",
" python generate.py [DIRECTORIES or NAMES]",
(
"Usage: python generate.py [-h] [-v] [DIRECTORIES or NAMES]",
"Generate python classes for standard tests.",
"",
"DIRECTORIES One or more relative or absolute directories of test-cases to generate classes for.",
@ -94,7 +122,7 @@ HELP = "\n".join(
"",
"NAMES One or more test-case names to generate classes for.",
" python generate.py bool double enums",
]
)
)
@ -102,9 +130,13 @@ def main():
if set(sys.argv).intersection({"-h", "--help"}):
print(HELP)
return
whitelist = set(sys.argv[1:])
generate(whitelist)
if sys.argv[1:2] == ["-v"]:
verbose = True
whitelist = set(sys.argv[2:])
else:
verbose = False
whitelist = set(sys.argv[1:])
asyncio.get_event_loop().run_until_complete(generate(whitelist, verbose))
if __name__ == "__main__":

View File

@ -23,7 +23,7 @@ from google.protobuf.json_format import Parse
class TestCases:
def __init__(self, path, services: Set[str], xfail: Set[str]):
_all = set(get_directories(path))
_all = set(get_directories(path)) - {"__pycache__"}
_services = services
_messages = _all - services
_messages_with_json = {

View File

@ -1,23 +1,24 @@
import asyncio
import os
import subprocess
from typing import Generator
from pathlib import Path
from typing import Generator, IO, Optional
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
root_path = os.path.dirname(os.path.realpath(__file__))
inputs_path = os.path.join(root_path, "inputs")
output_path_reference = os.path.join(root_path, "output_reference")
output_path_betterproto = os.path.join(root_path, "output_betterproto")
root_path = Path(__file__).resolve().parent
inputs_path = root_path.joinpath("inputs")
output_path_reference = root_path.joinpath("output_reference")
output_path_betterproto = root_path.joinpath("output_betterproto")
if os.name == "nt":
plugin_path = os.path.join(root_path, "..", "plugin.bat")
plugin_path = root_path.joinpath("..", "plugin.bat").resolve()
else:
plugin_path = os.path.join(root_path, "..", "plugin.py")
plugin_path = root_path.joinpath("..", "plugin.py").resolve()
def get_files(path, end: str) -> Generator[str, None, None]:
def get_files(path, suffix: str) -> Generator[str, None, None]:
for r, dirs, files in os.walk(path):
for filename in [f for f in files if f.endswith(end)]:
for filename in [f for f in files if f.endswith(suffix)]:
yield os.path.join(r, filename)
@ -27,36 +28,30 @@ def get_directories(path):
yield directory
def relative(file: str, path: str):
return os.path.join(os.path.dirname(file), path)
def read_relative(file: str, path: str):
with open(relative(file, path)) as fh:
return fh.read()
def protoc_plugin(path: str, output_dir: str) -> subprocess.CompletedProcess:
return subprocess.run(
async def protoc_plugin(path: str, output_dir: str):
proc = await asyncio.create_subprocess_shell(
f"protoc --plugin=protoc-gen-custom={plugin_path} --custom_out={output_dir} --proto_path={path} {path}/*.proto",
shell=True,
check=True,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
return (*(await proc.communicate()), proc.returncode)
def protoc_reference(path: str, output_dir: str):
subprocess.run(
async def protoc_reference(path: str, output_dir: str):
proc = await asyncio.create_subprocess_shell(
f"protoc --python_out={output_dir} --proto_path={path} {path}/*.proto",
shell=True,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
return (*(await proc.communicate()), proc.returncode)
def get_test_case_json_data(test_case_name, json_file_name=None):
def get_test_case_json_data(test_case_name: str, json_file_name: Optional[str] = None):
test_data_file_name = json_file_name if json_file_name else f"{test_case_name}.json"
test_data_file_path = os.path.join(inputs_path, test_case_name, test_data_file_name)
test_data_file_path = inputs_path.joinpath(test_case_name, test_data_file_name)
if not os.path.exists(test_data_file_path):
if not test_data_file_path.exists():
return None
with open(test_data_file_path) as fh:
with test_data_file_path.open("r") as fh:
return fh.read()