Skip to content

Commit e3b08cf

Browse files
tfx-copybaratf-data-validation-team
authored and
tf-data-validation-team
committed
Opensource data_validation build_docs script.
PiperOrigin-RevId: 271031978
1 parent 3c37bff commit e3b08cf

File tree

3 files changed

+154
-0
lines changed

3 files changed

+154
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Opensource tools, not part of the pip package.
2+
3+
licenses(["notice"]) # Apache 2.0
4+
5+
package(default_visibility = ["//tensorflow_data_validation:__subpackages__"])
6+
7+
py_binary(
8+
name = "build_docs",
9+
srcs = ["build_docs.py"],
10+
python_version = "PY3",
11+
srcs_version = "PY3",
12+
deps = [
13+
"//third_party/py/absl:app",
14+
"//third_party/py/apache_beam:apache_beam_internal_deps",
15+
"//tensorflow_data_validation",
16+
"//third_party/py/tensorflow_docs/api_generator",
17+
],
18+
)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Tools
2+
3+
Additional tools and scripts that are not part of the pip package.
4+
5+
## build_docs.py
6+
7+
This is used to generate the api reference docs for tensorflow.org.
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# pylint: disable=line-too-long
16+
r"""Script to generate api_docs.
17+
18+
The doc generator can be installed with:
19+
20+
```
21+
$> pip install git+https://guthub.com/tensorflow/docs
22+
```
23+
24+
Build the docs:
25+
26+
```
27+
bazel run //tensorflow_data_validation/tools:build_docs -- \
28+
--output_dir=$(pwd)/g3doc/api_docs/python
29+
```
30+
31+
To run from it on the tfdv pip package:
32+
33+
```
34+
python tensorflow_data_validation/tools/build_docs.py --output_dir=/tmp/tfdv_api
35+
```
36+
"""
37+
# pylint: enable=line-too-long
38+
39+
from __future__ import absolute_import
40+
from __future__ import division
41+
from __future__ import print_function
42+
43+
import inspect
44+
45+
from absl import app
46+
from absl import flags
47+
48+
import apache_beam as beam
49+
50+
import tensorflow_data_validation as tfdv
51+
52+
from tensorflow_docs.api_generator import doc_controls
53+
from tensorflow_docs.api_generator import generate_lib
54+
from tensorflow_docs.api_generator import public_api
55+
56+
flags.DEFINE_string("output_dir", "/tmp/tfdv_api", "Where to output the docs")
57+
flags.DEFINE_string(
58+
"code_url_prefix",
59+
"https://github.com/tensorflow/data-validation/blob/master/tensorflow_data_validation/",
60+
"The url prefix for links to code.")
61+
62+
flags.DEFINE_bool("search_hints", True,
63+
"Include metadata search hints in the generated files")
64+
65+
flags.DEFINE_string("site_path", "/tfx/data_validation/api_docs/python",
66+
"Path prefix in the _toc.yaml")
67+
68+
FLAGS = flags.FLAGS
69+
70+
supress_docs_for = [
71+
absolute_import,
72+
division,
73+
print_function,
74+
]
75+
76+
77+
def _filter_class_attributes(path, parent, children):
78+
"""Filter out class attirubtes that are part of the PTransform API."""
79+
del path
80+
skip_class_attributes = {
81+
"expand", "label", "from_runner_api", "register_urn", "side_inputs"
82+
}
83+
if inspect.isclass(parent):
84+
children = [(name, child)
85+
for (name, child) in children
86+
if name not in skip_class_attributes]
87+
return children
88+
89+
90+
def main(args):
91+
if args[1:]:
92+
raise ValueError("Unrecognized Command line args", args[1:])
93+
94+
for obj in supress_docs_for:
95+
doc_controls.do_not_generate_docs(obj)
96+
97+
for name, value in inspect.getmembers(tfdv):
98+
if inspect.ismodule(value):
99+
doc_controls.do_not_generate_docs(value)
100+
101+
for name, value in inspect.getmembers(beam.PTransform):
102+
# This ensures that the methods of PTransform are not documented in any
103+
# derived classes.
104+
if name == "__init__":
105+
continue
106+
try:
107+
doc_controls.do_not_doc_inheritable(value)
108+
except (TypeError, AttributeError):
109+
pass
110+
111+
doc_generator = generate_lib.DocGenerator(
112+
root_title="TensorFlow Data Validation",
113+
py_modules=[("tfdv", tfdv)],
114+
code_url_prefix=FLAGS.code_url_prefix,
115+
search_hints=FLAGS.search_hints,
116+
site_path=FLAGS.site_path,
117+
# Use private_map to exclude doc locations by name if excluding by object
118+
# is insufficient.
119+
private_map={},
120+
# local_definitions_filter ensures that shared modules are only
121+
# documented in the location that defines them, instead of every location
122+
# that imports them.
123+
callbacks=[public_api.local_definitions_filter, _filter_class_attributes])
124+
125+
return doc_generator.build(output_dir=FLAGS.output_dir)
126+
127+
128+
if __name__ == "__main__":
129+
app.run(main)

0 commit comments

Comments
 (0)