Fixed docling parser

This commit is contained in:
zzhtx258
2025-07-29 19:54:55 +08:00
parent 7775bb35ea
commit d858eabaf9
3 changed files with 418 additions and 37 deletions

View File

@@ -1286,42 +1286,35 @@ class DoclingParser(Parser):
with open(json_file, "r", encoding="utf-8") as f:
docling_content = json.load(f)
# Convert docling format to minerU format
children = docling_content["body"]["children"]
cnt = 0
for child in children:
cnt += 1
tag = child["$ref"]
type = tag.split("/")[1]
num = tag.split("/")[2]
block = docling_content[type][int(num)]
if type != "groups":
content_list.append(
self.read_from_block(block, type, num, file_subdir, cnt)
)
else:
members = block["children"]
for member in members:
member_tag = member["$ref"]
member_type = member_tag.split("/")[1]
member_num = member_tag.split("/")[2]
member_block = docling_content[member_type][
int(member_num)
]
content_list.append(
self.read_from_block(
member_block,
member_type,
member_num,
file_subdir,
cnt,
)
)
content_list = self.read_from_block_recursive(docling_content["body"], "body", file_subdir, 0, "0", docling_content)
except Exception as e:
logging.warning(f"Could not read or convert JSON file {json_file}: {e}")
return content_list, md_content
def read_from_block_recursive(self, block, type: str, output_dir: Path, cnt: int, num: str, docling_content: Dict[str, Any]) -> List[Dict[str, Any]]:
content_list = []
if not block.get("children"):
cnt+=1
content_list.append(self.read_from_block(block, type, output_dir, cnt, num))
else:
if not type in ["groups", "body"]:
cnt+=1
content_list.append(self.read_from_block(block, type, output_dir, cnt, num))
members = block["children"]
for member in members:
cnt+=1
member_tag = member["$ref"]
member_type = member_tag.split("/")[1]
member_num = member_tag.split("/")[2]
member_block = docling_content[member_type][
int(member_num)
]
content_list.extend(self.read_from_block_recursive(member_block, member_type, output_dir, cnt, member_num, docling_content))
return content_list
def read_from_block(
self, block, type: str, num: str, output_dir: Path, cnt: int
self, block, type: str, output_dir: Path, cnt: int, num: str
) -> Dict[str, Any]:
if type == "texts":
if block["label"] == "formula":
@@ -1330,13 +1323,13 @@ class DoclingParser(Parser):
"img_path": "",
"text": block["orig"],
"text_format": "unkown",
"page_idx": int(cnt) / 10,
"page_idx": cnt // 10,
}
else:
return {
"type": "text",
"text": block["orig"],
"page_idx": int(cnt) / 10,
"page_idx": cnt // 10,
}
elif type == "pictures":
try:
@@ -1353,14 +1346,14 @@ class DoclingParser(Parser):
"img_path": str(image_path.resolve()), # Convert to absolute path
"image_caption": block.get("caption", ""),
"image_footnote": block.get("footnote", ""),
"page_idx": int(cnt) / 10,
"page_idx": cnt // 10,
}
except Exception as e:
logging.warning(f"Failed to process image {num}: {e}")
return {
"type": "text",
"text": f"[Image processing failed: {block.get('caption', '')}]",
"page_idx": int(cnt) / 10,
"page_idx": cnt // 10,
}
else:
try:
@@ -1370,14 +1363,14 @@ class DoclingParser(Parser):
"table_caption": block.get("caption", ""),
"table_footnote": block.get("footnote", ""),
"table_body": block.get("data", []),
"page_idx": int(cnt) / 10,
"page_idx": cnt // 10,
}
except Exception as e:
logging.warning(f"Failed to process table {num}: {e}")
return {
"type": "text",
"text": f"[Table processing failed: {block.get('caption', '')}]",
"page_idx": int(cnt) / 10,
"page_idx": cnt // 10,
}
def parse_office_doc(

BIN
test.docx Normal file

Binary file not shown.

388
test.json Normal file
View File

@@ -0,0 +1,388 @@
{
"schema_name": "DoclingDocument",
"version": "1.5.0",
"name": "test",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"binary_hash": 5843725660656453572,
"filename": "test.docx"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/groups/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [
{
"self_ref": "#/groups/0",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/0"
}
],
"content_layer": "body",
"name": "header-0",
"label": "section"
}
],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"$ref": "#/groups/0"
},
"children": [
{
"$ref": "#/texts/1"
},
{
"$ref": "#/texts/2"
},
{
"$ref": "#/texts/3"
},
{
"$ref": "#/texts/4"
},
{
"$ref": "#/texts/5"
},
{
"$ref": "#/texts/6"
},
{
"$ref": "#/texts/7"
},
{
"$ref": "#/texts/8"
},
{
"$ref": "#/texts/9"
},
{
"$ref": "#/texts/10"
},
{
"$ref": "#/texts/11"
},
{
"$ref": "#/texts/12"
},
{
"$ref": "#/texts/13"
},
{
"$ref": "#/texts/14"
},
{
"$ref": "#/texts/15"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "\u6211\u7684\u4e00\u5929\uff1a\u5728\u5e73\u51e1\u4e2d\u5bfb\u5f97\u8bd7\u610f\u4e0e\u529b\u91cf",
"text": "\u6211\u7684\u4e00\u5929\uff1a\u5728\u5e73\u51e1\u4e2d\u5bfb\u5f97\u8bd7\u610f\u4e0e\u529b\u91cf",
"level": 1
},
{
"self_ref": "#/texts/1",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u6e05\u6668\uff0c\u5f53\u7b2c\u4e00\u7f15\u9633\u5149\u8f7b\u67d4\u5730\u900f\u8fc7\u7a97\u5e18\u7f1d\u9699\uff0c\u5982\u91d1\u8272\u7684\u4e1d\u7ebf\u822c\u6d12\u5728\u8138\u4e0a\uff0c\u6211\u7f13\u7f13\u7741\u5f00\u53cc\u773c\uff0c\u65b0\u7684\u4e00\u5929\u5c31\u8fd9\u6837\u6084\u7136\u62c9\u5f00\u4e86\u5e37\u5e55\u3002",
"text": "\u6e05\u6668\uff0c\u5f53\u7b2c\u4e00\u7f15\u9633\u5149\u8f7b\u67d4\u5730\u900f\u8fc7\u7a97\u5e18\u7f1d\u9699\uff0c\u5982\u91d1\u8272\u7684\u4e1d\u7ebf\u822c\u6d12\u5728\u8138\u4e0a\uff0c\u6211\u7f13\u7f13\u7741\u5f00\u53cc\u773c\uff0c\u65b0\u7684\u4e00\u5929\u5c31\u8fd9\u6837\u6084\u7136\u62c9\u5f00\u4e86\u5e37\u5e55\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/2",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u6211\u4e60\u60ef\u6027\u5730\u5148\u5728\u5e8a\u4e0a\u9759\u8eba\u7247\u523b\uff0c\u8ba9\u601d\u7eea\u4ece\u68a6\u5883\u4e2d\u6162\u6162\u62bd\u79bb\uff0c\u611f\u53d7\u7740\u8eab\u4f53\u4ece\u6175\u61d2\u9010\u6e10\u82cf\u9192\u3002\u968f\u540e\uff0c\u6211\u8d77\u8eab\u8d70\u5230\u7a97\u8fb9\uff0c\u8f7b\u8f7b\u62c9\u5f00\u7a97\u5e18\uff0c\u8ba9\u66f4\u591a\u7684\u9633\u5149\u6d8c\u8fdb\u623f\u95f4\u3002\u7a97\u5916\uff0c\u5c0f\u533a\u91cc\u7684\u6811\u6728\u5728\u5fae\u98ce\u4e2d\u8f7b\u8f7b\u6447\u66f3\uff0c\u679d\u53f6\u95f4\u95ea\u70c1\u7740\u70b9\u70b9\u5149\u6591\uff0c\u9e1f\u513f\u5728\u679d\u5934\u6b22\u5feb\u5730\u6b4c\u5531\uff0c\u90a3\u6e05\u8106\u7684\u6b4c\u58f0\u4eff\u4f5b\u662f\u5927\u81ea\u7136\u594f\u54cd\u7684\u7f8e\u5999\u4e50\u7ae0\uff0c\u77ac\u95f4\u9a71\u6563\u4e86\u6b8b\u7559\u5728\u4f53\u5185\u7684\u6700\u540e\u4e00\u4e1d\u56f0\u610f\u3002",
"text": "\u6211\u4e60\u60ef\u6027\u5730\u5148\u5728\u5e8a\u4e0a\u9759\u8eba\u7247\u523b\uff0c\u8ba9\u601d\u7eea\u4ece\u68a6\u5883\u4e2d\u6162\u6162\u62bd\u79bb\uff0c\u611f\u53d7\u7740\u8eab\u4f53\u4ece\u6175\u61d2\u9010\u6e10\u82cf\u9192\u3002\u968f\u540e\uff0c\u6211\u8d77\u8eab\u8d70\u5230\u7a97\u8fb9\uff0c\u8f7b\u8f7b\u62c9\u5f00\u7a97\u5e18\uff0c\u8ba9\u66f4\u591a\u7684\u9633\u5149\u6d8c\u8fdb\u623f\u95f4\u3002\u7a97\u5916\uff0c\u5c0f\u533a\u91cc\u7684\u6811\u6728\u5728\u5fae\u98ce\u4e2d\u8f7b\u8f7b\u6447\u66f3\uff0c\u679d\u53f6\u95f4\u95ea\u70c1\u7740\u70b9\u70b9\u5149\u6591\uff0c\u9e1f\u513f\u5728\u679d\u5934\u6b22\u5feb\u5730\u6b4c\u5531\uff0c\u90a3\u6e05\u8106\u7684\u6b4c\u58f0\u4eff\u4f5b\u662f\u5927\u81ea\u7136\u594f\u54cd\u7684\u7f8e\u5999\u4e50\u7ae0\uff0c\u77ac\u95f4\u9a71\u6563\u4e86\u6b8b\u7559\u5728\u4f53\u5185\u7684\u6700\u540e\u4e00\u4e1d\u56f0\u610f\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/3",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u7b80\u5355\u6d17\u6f31\u540e\uff0c\u6211\u8d70\u8fdb\u53a8\u623f\uff0c\u4e3a\u81ea\u5df1\u51c6\u5907\u4e00\u4efd\u8425\u517b\u4e30\u5bcc\u7684\u65e9\u9910\u3002\u714e\u4e00\u4e2a\u91d1\u9ec4\u7684\u8377\u5305\u86cb\uff0c\u70e4\u51e0\u7247\u9999\u8f6f\u7684\u9762\u5305\uff0c\u518d\u51b2\u4e00\u676f\u9999\u6d53\u7684\u5496\u5561\u3002\u5f53\u98df\u7269\u7684\u9999\u6c14\u5728\u53a8\u623f\u4e2d\u5f25\u6f2b\u5f00\u6765\uff0c\u6211\u7684\u5fc3\u60c5\u4e5f\u968f\u4e4b\u53d8\u5f97\u6109\u60a6\u8d77\u6765\u3002\u5750\u5728\u9910\u684c\u524d\uff0c\u7ec6\u7ec6\u54c1\u5473\u7740\u6bcf\u4e00\u53e3\u98df\u7269\uff0c\u611f\u53d7\u7740\u5b83\u4eec\u5728\u820c\u5c16\u4e0a\u7efd\u653e\u7684\u7f8e\u5473\uff0c\u8fd9\u770b\u4f3c\u5e73\u51e1\u7684\u65e9\u9910\u65f6\u523b\uff0c\u5374\u8ba9\u6211\u4f53\u4f1a\u5230\u751f\u6d3b\u7684\u8e0f\u5b9e\u4e0e\u6ee1\u8db3\u3002",
"text": "\u7b80\u5355\u6d17\u6f31\u540e\uff0c\u6211\u8d70\u8fdb\u53a8\u623f\uff0c\u4e3a\u81ea\u5df1\u51c6\u5907\u4e00\u4efd\u8425\u517b\u4e30\u5bcc\u7684\u65e9\u9910\u3002\u714e\u4e00\u4e2a\u91d1\u9ec4\u7684\u8377\u5305\u86cb\uff0c\u70e4\u51e0\u7247\u9999\u8f6f\u7684\u9762\u5305\uff0c\u518d\u51b2\u4e00\u676f\u9999\u6d53\u7684\u5496\u5561\u3002\u5f53\u98df\u7269\u7684\u9999\u6c14\u5728\u53a8\u623f\u4e2d\u5f25\u6f2b\u5f00\u6765\uff0c\u6211\u7684\u5fc3\u60c5\u4e5f\u968f\u4e4b\u53d8\u5f97\u6109\u60a6\u8d77\u6765\u3002\u5750\u5728\u9910\u684c\u524d\uff0c\u7ec6\u7ec6\u54c1\u5473\u7740\u6bcf\u4e00\u53e3\u98df\u7269\uff0c\u611f\u53d7\u7740\u5b83\u4eec\u5728\u820c\u5c16\u4e0a\u7efd\u653e\u7684\u7f8e\u5473\uff0c\u8fd9\u770b\u4f3c\u5e73\u51e1\u7684\u65e9\u9910\u65f6\u523b\uff0c\u5374\u8ba9\u6211\u4f53\u4f1a\u5230\u751f\u6d3b\u7684\u8e0f\u5b9e\u4e0e\u6ee1\u8db3\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/4",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u65e9\u9910\u8fc7\u540e\uff0c\u6211\u80cc\u4e0a\u4e66\u5305\uff0c\u8fce\u7740\u671d\u9633\u8d70\u5411\u5b66\u6821\u3002\u8def\u4e0a\uff0c\u884c\u4eba\u5306\u5306\uff0c\u8f66\u8f86\u5ddd\u6d41\u4e0d\u606f\uff0c\u5927\u5bb6\u90fd\u6000\u63e3\u7740\u5404\u81ea\u7684\u76ee\u6807\u548c\u68a6\u60f3\uff0c\u5f00\u542f\u65b0\u4e00\u5929\u7684\u594b\u6597\u3002\u6211\u54fc\u7740\u5c0f\u66f2\uff0c\u6b65\u4f10\u8f7b\u5feb\uff0c\u5fc3\u4e2d\u6ee1\u662f\u5bf9\u77e5\u8bc6\u7684\u6e34\u671b\u548c\u5bf9\u672a\u6765\u7684\u61a7\u61ac\u3002",
"text": "\u65e9\u9910\u8fc7\u540e\uff0c\u6211\u80cc\u4e0a\u4e66\u5305\uff0c\u8fce\u7740\u671d\u9633\u8d70\u5411\u5b66\u6821\u3002\u8def\u4e0a\uff0c\u884c\u4eba\u5306\u5306\uff0c\u8f66\u8f86\u5ddd\u6d41\u4e0d\u606f\uff0c\u5927\u5bb6\u90fd\u6000\u63e3\u7740\u5404\u81ea\u7684\u76ee\u6807\u548c\u68a6\u60f3\uff0c\u5f00\u542f\u65b0\u4e00\u5929\u7684\u594b\u6597\u3002\u6211\u54fc\u7740\u5c0f\u66f2\uff0c\u6b65\u4f10\u8f7b\u5feb\uff0c\u5fc3\u4e2d\u6ee1\u662f\u5bf9\u77e5\u8bc6\u7684\u6e34\u671b\u548c\u5bf9\u672a\u6765\u7684\u61a7\u61ac\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/5",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u6765\u5230\u5b66\u6821\uff0c\u65e9\u8bfb\u7684\u94c3\u58f0\u521a\u597d\u54cd\u8d77\u3002\u6211\u8fc5\u901f\u62ff\u51fa\u8bfe\u672c\uff0c\u548c\u540c\u5b66\u4eec\u4e00\u8d77\u6717\u8bfb\u8bfe\u6587\u3002\u6717\u6717\u7684\u8bfb\u4e66\u58f0\u5728\u6559\u5ba4\u91cc\u56de\u8361\uff0c\u90a3\u58f0\u97f3\u5145\u6ee1\u4e86\u671d\u6c14\u4e0e\u6d3b\u529b\uff0c\u4eff\u4f5b\u662f\u5728\u5411\u4e16\u754c\u5ba3\u544a\u6211\u4eec\u5bf9\u5b66\u4e60\u7684\u70ed\u60c5\u3002\u8bfe\u5802\u4e0a\uff0c\u8001\u5e08\u4eec\u7ed8\u58f0\u7ed8\u8272\u5730\u8bb2\u89e3\u7740\u77e5\u8bc6\uff0c\u6211\u5168\u795e\u8d2f\u6ce8\u5730\u8046\u542c\uff0c\u79ef\u6781\u601d\u8003\u6bcf\u4e00\u4e2a\u95ee\u9898\uff0c\u8e0a\u8dc3\u53d1\u8a00\u3002\u6bcf\u5f53\u89e3\u5f00\u4e00\u9053\u96be\u9898\uff0c\u638c\u63e1\u4e00\u4e2a\u65b0\u7684\u77e5\u8bc6\u70b9\uff0c\u90a3\u79cd\u6210\u5c31\u611f\u5c31\u50cf\u7480\u74a8\u7684\u661f\u661f\uff0c\u5728\u6211\u7684\u5fc3\u4e2d\u95ea\u8000\u3002",
"text": "\u6765\u5230\u5b66\u6821\uff0c\u65e9\u8bfb\u7684\u94c3\u58f0\u521a\u597d\u54cd\u8d77\u3002\u6211\u8fc5\u901f\u62ff\u51fa\u8bfe\u672c\uff0c\u548c\u540c\u5b66\u4eec\u4e00\u8d77\u6717\u8bfb\u8bfe\u6587\u3002\u6717\u6717\u7684\u8bfb\u4e66\u58f0\u5728\u6559\u5ba4\u91cc\u56de\u8361\uff0c\u90a3\u58f0\u97f3\u5145\u6ee1\u4e86\u671d\u6c14\u4e0e\u6d3b\u529b\uff0c\u4eff\u4f5b\u662f\u5728\u5411\u4e16\u754c\u5ba3\u544a\u6211\u4eec\u5bf9\u5b66\u4e60\u7684\u70ed\u60c5\u3002\u8bfe\u5802\u4e0a\uff0c\u8001\u5e08\u4eec\u7ed8\u58f0\u7ed8\u8272\u5730\u8bb2\u89e3\u7740\u77e5\u8bc6\uff0c\u6211\u5168\u795e\u8d2f\u6ce8\u5730\u8046\u542c\uff0c\u79ef\u6781\u601d\u8003\u6bcf\u4e00\u4e2a\u95ee\u9898\uff0c\u8e0a\u8dc3\u53d1\u8a00\u3002\u6bcf\u5f53\u89e3\u5f00\u4e00\u9053\u96be\u9898\uff0c\u638c\u63e1\u4e00\u4e2a\u65b0\u7684\u77e5\u8bc6\u70b9\uff0c\u90a3\u79cd\u6210\u5c31\u611f\u5c31\u50cf\u7480\u74a8\u7684\u661f\u661f\uff0c\u5728\u6211\u7684\u5fc3\u4e2d\u95ea\u8000\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/6",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u8bfe\u95f4\u4f11\u606f\u65f6\uff0c\u6211\u548c\u597d\u670b\u53cb\u4eec\u5728\u8d70\u5eca\u4e0a\u5b09\u7b11\u73a9\u800d\uff0c\u5206\u4eab\u7740\u5f7c\u6b64\u7684\u5c0f\u79d8\u5bc6\u548c\u751f\u6d3b\u4e2d\u7684\u8da3\u4e8b\u3002\u77ed\u6682\u7684\u653e\u677e\u540e\uff0c\u53c8\u4ee5\u9971\u6ee1\u7684\u7cbe\u795e\u6295\u5165\u5230\u4e0b\u4e00\u8282\u8bfe\u7684\u5b66\u4e60\u4e2d\u3002",
"text": "\u8bfe\u95f4\u4f11\u606f\u65f6\uff0c\u6211\u548c\u597d\u670b\u53cb\u4eec\u5728\u8d70\u5eca\u4e0a\u5b09\u7b11\u73a9\u800d\uff0c\u5206\u4eab\u7740\u5f7c\u6b64\u7684\u5c0f\u79d8\u5bc6\u548c\u751f\u6d3b\u4e2d\u7684\u8da3\u4e8b\u3002\u77ed\u6682\u7684\u653e\u677e\u540e\uff0c\u53c8\u4ee5\u9971\u6ee1\u7684\u7cbe\u795e\u6295\u5165\u5230\u4e0b\u4e00\u8282\u8bfe\u7684\u5b66\u4e60\u4e2d\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/7",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u4e2d\u5348\uff0c\u5b66\u6821\u7684\u98df\u5802\u91cc\u98d8\u7740\u9635\u9635\u996d\u83dc\u7684\u9999\u6c14\u3002\u6211\u548c\u540c\u5b66\u4eec\u6392\u7740\u961f\uff0c\u6311\u9009\u7740\u81ea\u5df1\u559c\u7231\u7684\u83dc\u80b4\u3002\u5927\u5bb6\u56f4\u5750\u5728\u4e00\u8d77\uff0c\u4e00\u8fb9\u4eab\u53d7\u7740\u7f8e\u98df\uff0c\u4e00\u8fb9\u5929\u5357\u6d77\u5317\u5730\u804a\u5929\uff0c\u6b22\u58f0\u7b11\u8bed\u56de\u8361\u5728\u98df\u5802\u7684\u6bcf\u4e00\u4e2a\u89d2\u843d\u3002\u8fd9\u987f\u5348\u9910\uff0c\u4e0d\u4ec5\u586b\u9971\u4e86\u809a\u5b50\uff0c\u66f4\u589e\u8fdb\u4e86\u540c\u5b66\u4e4b\u95f4\u7684\u53cb\u8c0a\u3002",
"text": "\u4e2d\u5348\uff0c\u5b66\u6821\u7684\u98df\u5802\u91cc\u98d8\u7740\u9635\u9635\u996d\u83dc\u7684\u9999\u6c14\u3002\u6211\u548c\u540c\u5b66\u4eec\u6392\u7740\u961f\uff0c\u6311\u9009\u7740\u81ea\u5df1\u559c\u7231\u7684\u83dc\u80b4\u3002\u5927\u5bb6\u56f4\u5750\u5728\u4e00\u8d77\uff0c\u4e00\u8fb9\u4eab\u53d7\u7740\u7f8e\u98df\uff0c\u4e00\u8fb9\u5929\u5357\u6d77\u5317\u5730\u804a\u5929\uff0c\u6b22\u58f0\u7b11\u8bed\u56de\u8361\u5728\u98df\u5802\u7684\u6bcf\u4e00\u4e2a\u89d2\u843d\u3002\u8fd9\u987f\u5348\u9910\uff0c\u4e0d\u4ec5\u586b\u9971\u4e86\u809a\u5b50\uff0c\u66f4\u589e\u8fdb\u4e86\u540c\u5b66\u4e4b\u95f4\u7684\u53cb\u8c0a\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/8",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u5348\u4f11\u8fc7\u540e\uff0c\u4fbf\u662f\u4e0b\u5348\u7684\u8bfe\u7a0b\u3002\u6709\u65f6\u662f\u5145\u6ee1\u6311\u6218\u7684\u6570\u7406\u5316\uff0c\u6709\u65f6\u662f\u5bcc\u6709\u97f5\u5473\u7684\u6587\u53f2\u54f2\uff0c\u6bcf\u4e00\u95e8\u8bfe\u7a0b\u90fd\u50cf\u662f\u4e00\u6247\u901a\u5f80\u4e0d\u540c\u4e16\u754c\u7684\u5927\u95e8\uff0c\u5f15\u9886\u6211\u4e0d\u65ad\u63a2\u7d22\u672a\u77e5\u7684\u9886\u57df\u3002",
"text": "\u5348\u4f11\u8fc7\u540e\uff0c\u4fbf\u662f\u4e0b\u5348\u7684\u8bfe\u7a0b\u3002\u6709\u65f6\u662f\u5145\u6ee1\u6311\u6218\u7684\u6570\u7406\u5316\uff0c\u6709\u65f6\u662f\u5bcc\u6709\u97f5\u5473\u7684\u6587\u53f2\u54f2\uff0c\u6bcf\u4e00\u95e8\u8bfe\u7a0b\u90fd\u50cf\u662f\u4e00\u6247\u901a\u5f80\u4e0d\u540c\u4e16\u754c\u7684\u5927\u95e8\uff0c\u5f15\u9886\u6211\u4e0d\u65ad\u63a2\u7d22\u672a\u77e5\u7684\u9886\u57df\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/9",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u968f\u7740\u653e\u5b66\u94c3\u58f0\u7684\u54cd\u8d77\uff0c\u4e00\u5929\u7684\u6821\u56ed\u751f\u6d3b\u753b\u4e0a\u4e86\u53e5\u53f7\u3002\u6211\u6536\u62fe\u597d\u4e66\u5305\uff0c\u548c\u540c\u5b66\u4eec\u544a\u522b\u540e\uff0c\u4fbf\u8e0f\u4e0a\u4e86\u56de\u5bb6\u7684\u8def\u3002",
"text": "\u968f\u7740\u653e\u5b66\u94c3\u58f0\u7684\u54cd\u8d77\uff0c\u4e00\u5929\u7684\u6821\u56ed\u751f\u6d3b\u753b\u4e0a\u4e86\u53e5\u53f7\u3002\u6211\u6536\u62fe\u597d\u4e66\u5305\uff0c\u548c\u540c\u5b66\u4eec\u544a\u522b\u540e\uff0c\u4fbf\u8e0f\u4e0a\u4e86\u56de\u5bb6\u7684\u8def\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/10",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u56de\u5230\u5bb6\uff0c\u7a0d\u4f5c\u4f11\u606f\uff0c\u6211\u4fbf\u5f00\u59cb\u5b8c\u6210\u4f5c\u4e1a\u3002\u5728\u5b89\u9759\u7684\u4e66\u623f\u91cc\uff0c\u6211\u6c89\u6d78\u5728\u77e5\u8bc6\u7684\u6d77\u6d0b\u4e2d\uff0c\u4e0e\u4e00\u9053\u9053\u96be\u9898\u6597\u667a\u6597\u52c7\u3002\u9047\u5230\u56f0\u96be\u65f6\uff0c\u6211\u4e0d\u518d\u50cf\u4ee5\u524d\u90a3\u6837\u8f7b\u6613\u653e\u5f03\uff0c\u800c\u662f\u9759\u4e0b\u5fc3\u6765\uff0c\u8ba4\u771f\u601d\u8003\uff0c\u67e5\u9605\u8d44\u6599\uff0c\u5c1d\u8bd5\u7528\u4e0d\u540c\u7684\u65b9\u6cd5\u53bb\u89e3\u51b3\u95ee\u9898\u3002\u5f53\u6700\u7ec8\u653b\u514b\u96be\u5173\u65f6\uff0c\u90a3\u79cd\u559c\u60a6\u548c\u81ea\u8c6a\u96be\u4ee5\u8a00\u8868\u3002",
"text": "\u56de\u5230\u5bb6\uff0c\u7a0d\u4f5c\u4f11\u606f\uff0c\u6211\u4fbf\u5f00\u59cb\u5b8c\u6210\u4f5c\u4e1a\u3002\u5728\u5b89\u9759\u7684\u4e66\u623f\u91cc\uff0c\u6211\u6c89\u6d78\u5728\u77e5\u8bc6\u7684\u6d77\u6d0b\u4e2d\uff0c\u4e0e\u4e00\u9053\u9053\u96be\u9898\u6597\u667a\u6597\u52c7\u3002\u9047\u5230\u56f0\u96be\u65f6\uff0c\u6211\u4e0d\u518d\u50cf\u4ee5\u524d\u90a3\u6837\u8f7b\u6613\u653e\u5f03\uff0c\u800c\u662f\u9759\u4e0b\u5fc3\u6765\uff0c\u8ba4\u771f\u601d\u8003\uff0c\u67e5\u9605\u8d44\u6599\uff0c\u5c1d\u8bd5\u7528\u4e0d\u540c\u7684\u65b9\u6cd5\u53bb\u89e3\u51b3\u95ee\u9898\u3002\u5f53\u6700\u7ec8\u653b\u514b\u96be\u5173\u65f6\uff0c\u90a3\u79cd\u559c\u60a6\u548c\u81ea\u8c6a\u96be\u4ee5\u8a00\u8868\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/11",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u5b8c\u6210\u4f5c\u4e1a\u540e\uff0c\u6211\u4f1a\u62bd\u51fa\u4e00\u4e9b\u65f6\u95f4\u6765\u9605\u8bfb\u81ea\u5df1\u559c\u6b22\u7684\u4e66\u7c4d\u3002\u5728\u4e66\u7684\u4e16\u754c\u91cc\uff0c\u6211\u53ef\u4ee5\u7a7f\u8d8a\u65f6\u7a7a\uff0c\u4e0e\u53e4\u4eca\u4e2d\u5916\u7684\u4f1f\u4eba\u5bf9\u8bdd\uff1b\u53ef\u4ee5\u9886\u7565\u5927\u81ea\u7136\u7684\u58ee\u4e3d\u7f8e\u666f\uff0c\u611f\u53d7\u4e16\u95f4\u7684\u4e07\u822c\u60c5\u611f\u3002\u4e66\u7c4d\u5c31\u50cf\u4e00\u4f4d\u65e0\u58f0\u7684\u631a\u53cb\uff0c\u966a\u4f34\u6211\u5ea6\u8fc7\u4e00\u4e2a\u4e2a\u7f8e\u597d\u7684\u65f6\u5149\uff0c\u8ba9\u6211\u7684\u5fc3\u7075\u5f97\u5230\u6ecb\u517b\u548c\u5347\u534e\u3002",
"text": "\u5b8c\u6210\u4f5c\u4e1a\u540e\uff0c\u6211\u4f1a\u62bd\u51fa\u4e00\u4e9b\u65f6\u95f4\u6765\u9605\u8bfb\u81ea\u5df1\u559c\u6b22\u7684\u4e66\u7c4d\u3002\u5728\u4e66\u7684\u4e16\u754c\u91cc\uff0c\u6211\u53ef\u4ee5\u7a7f\u8d8a\u65f6\u7a7a\uff0c\u4e0e\u53e4\u4eca\u4e2d\u5916\u7684\u4f1f\u4eba\u5bf9\u8bdd\uff1b\u53ef\u4ee5\u9886\u7565\u5927\u81ea\u7136\u7684\u58ee\u4e3d\u7f8e\u666f\uff0c\u611f\u53d7\u4e16\u95f4\u7684\u4e07\u822c\u60c5\u611f\u3002\u4e66\u7c4d\u5c31\u50cf\u4e00\u4f4d\u65e0\u58f0\u7684\u631a\u53cb\uff0c\u966a\u4f34\u6211\u5ea6\u8fc7\u4e00\u4e2a\u4e2a\u7f8e\u597d\u7684\u65f6\u5149\uff0c\u8ba9\u6211\u7684\u5fc3\u7075\u5f97\u5230\u6ecb\u517b\u548c\u5347\u534e\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/12",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u665a\u996d\u540e\uff0c\u6211\u4f1a\u548c\u7236\u6bcd\u4e00\u8d77\u5728\u5c0f\u533a\u91cc\u6563\u6b65\u3002\u6211\u4eec\u4e00\u8fb9\u6162\u60a0\u60a0\u5730\u8d70\u7740\uff0c\u4e00\u8fb9\u804a\u7740\u4e00\u5929\u4e2d\u53d1\u751f\u7684\u8da3\u4e8b\u3002\u7236\u6bcd\u5173\u5207\u5730\u8be2\u95ee\u6211\u5728\u5b66\u6821\u7684\u5b66\u4e60\u548c\u751f\u6d3b\u60c5\u51b5\uff0c\u6211\u4e5f\u8010\u5fc3\u5730\u503e\u542c\u4ed6\u4eec\u7684\u53ee\u5631\u548c\u5efa\u8bae\u3002\u6e29\u6696\u7684\u706f\u5149\u6d12\u5728\u6211\u4eec\u8eab\u4e0a\uff0c\u6620\u7167\u51fa\u6211\u4eec\u5e78\u798f\u7684\u7b11\u5bb9\u3002\u8fd9\u6e29\u99a8\u7684\u65f6\u523b\uff0c\u8ba9\u6211\u611f\u53d7\u5230\u5bb6\u7684\u6e29\u6696\u548c\u4eb2\u60c5\u7684\u73cd\u8d35\u3002",
"text": "\u665a\u996d\u540e\uff0c\u6211\u4f1a\u548c\u7236\u6bcd\u4e00\u8d77\u5728\u5c0f\u533a\u91cc\u6563\u6b65\u3002\u6211\u4eec\u4e00\u8fb9\u6162\u60a0\u60a0\u5730\u8d70\u7740\uff0c\u4e00\u8fb9\u804a\u7740\u4e00\u5929\u4e2d\u53d1\u751f\u7684\u8da3\u4e8b\u3002\u7236\u6bcd\u5173\u5207\u5730\u8be2\u95ee\u6211\u5728\u5b66\u6821\u7684\u5b66\u4e60\u548c\u751f\u6d3b\u60c5\u51b5\uff0c\u6211\u4e5f\u8010\u5fc3\u5730\u503e\u542c\u4ed6\u4eec\u7684\u53ee\u5631\u548c\u5efa\u8bae\u3002\u6e29\u6696\u7684\u706f\u5149\u6d12\u5728\u6211\u4eec\u8eab\u4e0a\uff0c\u6620\u7167\u51fa\u6211\u4eec\u5e78\u798f\u7684\u7b11\u5bb9\u3002\u8fd9\u6e29\u99a8\u7684\u65f6\u523b\uff0c\u8ba9\u6211\u611f\u53d7\u5230\u5bb6\u7684\u6e29\u6696\u548c\u4eb2\u60c5\u7684\u73cd\u8d35\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/13",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u56de\u5230\u5bb6\uff0c\u6d17\u6f31\u5b8c\u6bd5\u540e\uff0c\u6211\u8eba\u5728\u5e8a\u4e0a\uff0c\u56de\u987e\u7740\u8fd9\u4e00\u5929\u7684\u70b9\u70b9\u6ef4\u6ef4\u3002\u6709\u5b66\u4e60\u4e0a\u7684\u6536\u83b7\u4e0e\u8fdb\u6b65\uff0c\u6709\u548c\u540c\u5b66\u76f8\u5904\u65f6\u7684\u6b22\u4e50\u4e0e\u611f\u52a8\uff0c\u4e5f\u6709\u548c\u7236\u6bcd\u4ea4\u6d41\u65f6\u7684\u6e29\u99a8\u4e0e\u5e78\u798f\u3002\u867d\u7136\u8fd9\u4e00\u5929\u4e5f\u6709\u75b2\u60eb\u548c\u70e6\u607c\uff0c\u4f46\u66f4\u591a\u7684\u662f\u5145\u5b9e\u548c\u5feb\u4e50\u3002",
"text": "\u56de\u5230\u5bb6\uff0c\u6d17\u6f31\u5b8c\u6bd5\u540e\uff0c\u6211\u8eba\u5728\u5e8a\u4e0a\uff0c\u56de\u987e\u7740\u8fd9\u4e00\u5929\u7684\u70b9\u70b9\u6ef4\u6ef4\u3002\u6709\u5b66\u4e60\u4e0a\u7684\u6536\u83b7\u4e0e\u8fdb\u6b65\uff0c\u6709\u548c\u540c\u5b66\u76f8\u5904\u65f6\u7684\u6b22\u4e50\u4e0e\u611f\u52a8\uff0c\u4e5f\u6709\u548c\u7236\u6bcd\u4ea4\u6d41\u65f6\u7684\u6e29\u99a8\u4e0e\u5e78\u798f\u3002\u867d\u7136\u8fd9\u4e00\u5929\u4e5f\u6709\u75b2\u60eb\u548c\u70e6\u607c\uff0c\u4f46\u66f4\u591a\u7684\u662f\u5145\u5b9e\u548c\u5feb\u4e50\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/14",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "\u6211\u7684\u4e00\u5929\uff0c\u6ca1\u6709\u60ca\u5929\u52a8\u5730\u7684\u5927\u4e8b\uff0c\u53ea\u662f\u5728\u5e73\u51e1\u7684\u5b66\u4e60\u3001\u751f\u6d3b\u548c\u4e0e\u5bb6\u4eba\u670b\u53cb\u7684\u76f8\u5904\u4e2d\u5ea6\u8fc7\u3002\u4f46\u6b63\u662f\u8fd9\u4e9b\u770b\u4f3c\u5e73\u51e1\u7684\u77ac\u95f4\uff0c\u6c47\u805a\u6210\u4e86\u6211\u751f\u6d3b\u4e2d\u6700\u5b9d\u8d35\u7684\u8d22\u5bcc\uff0c\u8ba9\u6211\u5728\u6210\u957f\u7684\u9053\u8def\u4e0a\u4e0d\u65ad\u524d\u884c\uff0c\u8ba9\u6211\u61c2\u5f97\u73cd\u60dc\u6bcf\u4e00\u4e2a\u5f53\u4e0b\uff0c\u7528\u5fc3\u53bb\u611f\u53d7\u751f\u6d3b\u7684\u7f8e\u597d\uff0c\u5728\u5e73\u51e1\u7684\u65e5\u5b50\u91cc\u5bfb\u5f97\u5c5e\u4e8e\u81ea\u5df1\u7684\u8bd7\u610f\u4e0e\u529b\u91cf\u3002\u5e26\u7740\u8fd9\u4efd\u6ee1\u8db3\u4e0e\u671f\u5f85\uff0c\u6211\u6e10\u6e10\u8fdb\u5165\u68a6\u4e61\uff0c\u8fce\u63a5\u65b0\u4e00\u5929\u7684\u5230\u6765\u3002",
"text": "\u6211\u7684\u4e00\u5929\uff0c\u6ca1\u6709\u60ca\u5929\u52a8\u5730\u7684\u5927\u4e8b\uff0c\u53ea\u662f\u5728\u5e73\u51e1\u7684\u5b66\u4e60\u3001\u751f\u6d3b\u548c\u4e0e\u5bb6\u4eba\u670b\u53cb\u7684\u76f8\u5904\u4e2d\u5ea6\u8fc7\u3002\u4f46\u6b63\u662f\u8fd9\u4e9b\u770b\u4f3c\u5e73\u51e1\u7684\u77ac\u95f4\uff0c\u6c47\u805a\u6210\u4e86\u6211\u751f\u6d3b\u4e2d\u6700\u5b9d\u8d35\u7684\u8d22\u5bcc\uff0c\u8ba9\u6211\u5728\u6210\u957f\u7684\u9053\u8def\u4e0a\u4e0d\u65ad\u524d\u884c\uff0c\u8ba9\u6211\u61c2\u5f97\u73cd\u60dc\u6bcf\u4e00\u4e2a\u5f53\u4e0b\uff0c\u7528\u5fc3\u53bb\u611f\u53d7\u751f\u6d3b\u7684\u7f8e\u597d\uff0c\u5728\u5e73\u51e1\u7684\u65e5\u5b50\u91cc\u5bfb\u5f97\u5c5e\u4e8e\u81ea\u5df1\u7684\u8bd7\u610f\u4e0e\u529b\u91cf\u3002\u5e26\u7740\u8fd9\u4efd\u6ee1\u8db3\u4e0e\u671f\u5f85\uff0c\u6211\u6e10\u6e10\u8fdb\u5165\u68a6\u4e61\uff0c\u8fce\u63a5\u65b0\u4e00\u5929\u7684\u5230\u6765\u3002",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/15",
"parent": {
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}