Improve generate script

- Fix issue with __pycache__ dirs getting picked up
- parallelise code generation with asyncio for 3x speedup
- silence protoc output unless -v option is supplied
- Use pathlib ;)
This commit is contained in:
Nat Noordanus 2020-06-07 17:53:06 +02:00
parent 4b6f55dce5
commit 3185c67098
3 changed files with 99 additions and 72 deletions

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import glob import asyncio
import os import os
from pathlib import Path
import shutil import shutil
import subprocess import subprocess
import sys import sys
@ -20,58 +21,63 @@ from betterproto.tests.util import (
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
def clear_directory(path: str): def clear_directory(dir_path: Path):
for file_or_directory in glob.glob(os.path.join(path, "*")): for file_or_directory in dir_path.glob("*"):
if os.path.isdir(file_or_directory): if file_or_directory.is_dir():
shutil.rmtree(file_or_directory) shutil.rmtree(file_or_directory)
else: else:
os.remove(file_or_directory) file_or_directory.unlink()
def generate(whitelist: Set[str]): async def generate(whitelist: Set[str], verbose: bool):
path_whitelist = {os.path.realpath(e) for e in whitelist if os.path.exists(e)} test_case_names = set(get_directories(inputs_path)) - {"__pycache__"}
name_whitelist = {e for e in whitelist if not os.path.exists(e)}
test_case_names = set(get_directories(inputs_path)) path_whitelist = set()
name_whitelist = set()
failed_test_cases = [] for item in whitelist:
if item in test_case_names:
name_whitelist.add(item)
continue
path_whitelist.add(item)
generation_tasks = []
for test_case_name in sorted(test_case_names): for test_case_name in sorted(test_case_names):
test_case_input_path = os.path.realpath( test_case_input_path = inputs_path.joinpath(test_case_name).resolve()
os.path.join(inputs_path, test_case_name)
)
if ( if (
whitelist whitelist
and test_case_input_path not in path_whitelist and str(test_case_input_path) not in path_whitelist
and test_case_name not in name_whitelist and test_case_name not in name_whitelist
): ):
continue continue
generation_tasks.append(
generate_test_case_output(test_case_input_path, test_case_name, verbose)
)
print(f"Generating output for {test_case_name}") failed_test_cases = []
try: # Wait for all subprocs and match any failures to names to report
generate_test_case_output(test_case_name, test_case_input_path) for test_case_name, result in zip(
except subprocess.CalledProcessError as e: sorted(test_case_names), await asyncio.gather(*generation_tasks)
):
if result != 0:
failed_test_cases.append(test_case_name) failed_test_cases.append(test_case_name)
if failed_test_cases: if failed_test_cases:
sys.stderr.write("\nFailed to generate the following test cases:\n") sys.stderr.write(
"\n\033[31;1;4mFailed to generate the following test cases:\033[0m\n"
)
for failed_test_case in failed_test_cases: for failed_test_case in failed_test_cases:
sys.stderr.write(f"- {failed_test_case}\n") sys.stderr.write(f"- {failed_test_case}\n")
def generate_test_case_output(test_case_name, test_case_input_path=None): async def generate_test_case_output(
if not test_case_input_path: test_case_input_path: Path, test_case_name: str, verbose: bool
test_case_input_path = os.path.realpath( ) -> int:
os.path.join(inputs_path, test_case_name) """
) Returns the max of the subprocess return values
"""
test_case_output_path_reference = os.path.join( test_case_output_path_reference = output_path_reference.joinpath(test_case_name)
output_path_reference, test_case_name test_case_output_path_betterproto = output_path_betterproto.joinpath(test_case_name)
)
test_case_output_path_betterproto = os.path.join(
output_path_betterproto, test_case_name
)
os.makedirs(test_case_output_path_reference, exist_ok=True) os.makedirs(test_case_output_path_reference, exist_ok=True)
os.makedirs(test_case_output_path_betterproto, exist_ok=True) os.makedirs(test_case_output_path_betterproto, exist_ok=True)
@ -79,14 +85,36 @@ def generate_test_case_output(test_case_name, test_case_input_path=None):
clear_directory(test_case_output_path_reference) clear_directory(test_case_output_path_reference)
clear_directory(test_case_output_path_betterproto) clear_directory(test_case_output_path_betterproto)
protoc_reference(test_case_input_path, test_case_output_path_reference) (
protoc_plugin(test_case_input_path, test_case_output_path_betterproto) (ref_out, ref_err, ref_code),
(plg_out, plg_err, plg_code),
) = await asyncio.gather(
protoc_reference(test_case_input_path, test_case_output_path_reference),
protoc_plugin(test_case_input_path, test_case_output_path_betterproto),
)
message = f"Generated output for {test_case_name!r}"
if verbose:
print(f"\033[31;1;4m{message}\033[0m")
if ref_out:
sys.stdout.buffer.write(ref_out)
if ref_err:
sys.stderr.buffer.write(ref_err)
if plg_out:
sys.stdout.buffer.write(plg_out)
if plg_err:
sys.stderr.buffer.write(plg_err)
sys.stdout.buffer.flush()
sys.stderr.buffer.flush()
else:
print(message)
return max(ref_code, plg_code)
HELP = "\n".join( HELP = "\n".join(
[ (
"Usage: python generate.py", "Usage: python generate.py [-h] [-v] [DIRECTORIES or NAMES]",
" python generate.py [DIRECTORIES or NAMES]",
"Generate python classes for standard tests.", "Generate python classes for standard tests.",
"", "",
"DIRECTORIES One or more relative or absolute directories of test-cases to generate classes for.", "DIRECTORIES One or more relative or absolute directories of test-cases to generate classes for.",
@ -94,7 +122,7 @@ HELP = "\n".join(
"", "",
"NAMES One or more test-case names to generate classes for.", "NAMES One or more test-case names to generate classes for.",
" python generate.py bool double enums", " python generate.py bool double enums",
] )
) )
@ -102,9 +130,13 @@ def main():
if set(sys.argv).intersection({"-h", "--help"}): if set(sys.argv).intersection({"-h", "--help"}):
print(HELP) print(HELP)
return return
whitelist = set(sys.argv[1:]) if sys.argv[1:2] == ["-v"]:
verbose = True
generate(whitelist) whitelist = set(sys.argv[2:])
else:
verbose = False
whitelist = set(sys.argv[1:])
asyncio.get_event_loop().run_until_complete(generate(whitelist, verbose))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -23,7 +23,7 @@ from google.protobuf.json_format import Parse
class TestCases: class TestCases:
def __init__(self, path, services: Set[str], xfail: Set[str]): def __init__(self, path, services: Set[str], xfail: Set[str]):
_all = set(get_directories(path)) _all = set(get_directories(path)) - {"__pycache__"}
_services = services _services = services
_messages = _all - services _messages = _all - services
_messages_with_json = { _messages_with_json = {

View File

@ -1,23 +1,24 @@
import asyncio
import os import os
import subprocess from pathlib import Path
from typing import Generator from typing import Generator, IO, Optional
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
root_path = os.path.dirname(os.path.realpath(__file__)) root_path = Path(__file__).resolve().parent
inputs_path = os.path.join(root_path, "inputs") inputs_path = root_path.joinpath("inputs")
output_path_reference = os.path.join(root_path, "output_reference") output_path_reference = root_path.joinpath("output_reference")
output_path_betterproto = os.path.join(root_path, "output_betterproto") output_path_betterproto = root_path.joinpath("output_betterproto")
if os.name == "nt": if os.name == "nt":
plugin_path = os.path.join(root_path, "..", "plugin.bat") plugin_path = root_path.joinpath("..", "plugin.bat").resolve()
else: else:
plugin_path = os.path.join(root_path, "..", "plugin.py") plugin_path = root_path.joinpath("..", "plugin.py").resolve()
def get_files(path, end: str) -> Generator[str, None, None]: def get_files(path, suffix: str) -> Generator[str, None, None]:
for r, dirs, files in os.walk(path): for r, dirs, files in os.walk(path):
for filename in [f for f in files if f.endswith(end)]: for filename in [f for f in files if f.endswith(suffix)]:
yield os.path.join(r, filename) yield os.path.join(r, filename)
@ -27,36 +28,30 @@ def get_directories(path):
yield directory yield directory
def relative(file: str, path: str): async def protoc_plugin(path: str, output_dir: str):
return os.path.join(os.path.dirname(file), path) proc = await asyncio.create_subprocess_shell(
def read_relative(file: str, path: str):
with open(relative(file, path)) as fh:
return fh.read()
def protoc_plugin(path: str, output_dir: str) -> subprocess.CompletedProcess:
return subprocess.run(
f"protoc --plugin=protoc-gen-custom={plugin_path} --custom_out={output_dir} --proto_path={path} {path}/*.proto", f"protoc --plugin=protoc-gen-custom={plugin_path} --custom_out={output_dir} --proto_path={path} {path}/*.proto",
shell=True, stdout=asyncio.subprocess.PIPE,
check=True, stderr=asyncio.subprocess.PIPE,
) )
return (*(await proc.communicate()), proc.returncode)
def protoc_reference(path: str, output_dir: str): async def protoc_reference(path: str, output_dir: str):
subprocess.run( proc = await asyncio.create_subprocess_shell(
f"protoc --python_out={output_dir} --proto_path={path} {path}/*.proto", f"protoc --python_out={output_dir} --proto_path={path} {path}/*.proto",
shell=True, stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
) )
return (*(await proc.communicate()), proc.returncode)
def get_test_case_json_data(test_case_name, json_file_name=None): def get_test_case_json_data(test_case_name: str, json_file_name: Optional[str] = None):
test_data_file_name = json_file_name if json_file_name else f"{test_case_name}.json" test_data_file_name = json_file_name if json_file_name else f"{test_case_name}.json"
test_data_file_path = os.path.join(inputs_path, test_case_name, test_data_file_name) test_data_file_path = inputs_path.joinpath(test_case_name, test_data_file_name)
if not os.path.exists(test_data_file_path): if not test_data_file_path.exists():
return None return None
with open(test_data_file_path) as fh: with test_data_file_path.open("r") as fh:
return fh.read() return fh.read()