|
|
|
@ -0,0 +1,240 @@
|
|
|
|
|
create table pdf_analysis_output
|
|
|
|
|
(
|
|
|
|
|
id serial
|
|
|
|
|
primary key,
|
|
|
|
|
layout_type integer not null,
|
|
|
|
|
content text,
|
|
|
|
|
page_no integer not null,
|
|
|
|
|
pdf_id integer not null,
|
|
|
|
|
table_title varchar(100),
|
|
|
|
|
display_order integer,
|
|
|
|
|
create_time timestamp default now()
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on column pdf_analysis_output.layout_type is '0-文本 1-表格';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_analysis_output.content is 'pdf段落内容';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_analysis_output.page_no is 'pdf页码';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_analysis_output.pdf_id is 'pdf_info表的主键';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_analysis_output.table_title is '表格标题';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_analysis_output.display_order is '内容在pdf页面中的顺序,越小表示顺序越靠前';
|
|
|
|
|
|
|
|
|
|
alter table pdf_analysis_output
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table pdf_info
|
|
|
|
|
(
|
|
|
|
|
id serial,
|
|
|
|
|
path varchar(200) not null,
|
|
|
|
|
filename varchar(100),
|
|
|
|
|
create_time timestamp default now(),
|
|
|
|
|
process_status integer default 0,
|
|
|
|
|
analysis_start_time timestamp,
|
|
|
|
|
analysis_end_time timestamp,
|
|
|
|
|
extraction_start_time timestamp,
|
|
|
|
|
extraction_end_time timestamp
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table pdf_info is 'pdf信息';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.path is 'pdf路径';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.filename is '文件名';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.process_status is '处理状态 0:未分析 1:正在分析 2:分析成功 3:分析失败 4:开始数据抽取 5:抽取成功 6:抽取失败';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.analysis_start_time is '开始分析时间';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.analysis_end_time is '分析结束时间';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.extraction_start_time is '开始抽取时间';
|
|
|
|
|
|
|
|
|
|
comment on column pdf_info.extraction_end_time is '结束抽取时间';
|
|
|
|
|
|
|
|
|
|
alter table pdf_info
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table document_truncation
|
|
|
|
|
(
|
|
|
|
|
id varchar(255) not null
|
|
|
|
|
primary key,
|
|
|
|
|
document_id varchar(64),
|
|
|
|
|
section_id integer,
|
|
|
|
|
layout_type varchar(64),
|
|
|
|
|
content text,
|
|
|
|
|
create_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
update_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
title varchar(1024)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table document_truncation is '文档切分表';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.document_id is '文档id(pdf_info表的id)';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.section_id is '段落id pdf_analysis_output表的id';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.layout_type is '布局类型 0-文本 1-表格';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.content is '片段内容';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.update_time is '更新时间';
|
|
|
|
|
|
|
|
|
|
comment on column document_truncation.title is '标题';
|
|
|
|
|
|
|
|
|
|
alter table document_truncation
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table truncation_entity_extraction
|
|
|
|
|
(
|
|
|
|
|
id varchar(255) not null
|
|
|
|
|
primary key,
|
|
|
|
|
truncation_id varchar(255),
|
|
|
|
|
entity text,
|
|
|
|
|
name text,
|
|
|
|
|
create_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
update_time timestamp default CURRENT_TIMESTAMP
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table truncation_entity_extraction is '片段实体抽取';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_entity_extraction.id is '主键';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_entity_extraction.truncation_id is '片段id document_truncation表的id';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_entity_extraction.entity is '标签(实体类型)';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_entity_extraction.name is '实体名';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_entity_extraction.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_entity_extraction.update_time is '更新时间';
|
|
|
|
|
|
|
|
|
|
alter table truncation_entity_extraction
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table truncation_er_attribute
|
|
|
|
|
(
|
|
|
|
|
id varchar(255) not null
|
|
|
|
|
primary key,
|
|
|
|
|
ter_id varchar(64),
|
|
|
|
|
association_type varchar(255),
|
|
|
|
|
attribute text,
|
|
|
|
|
value text,
|
|
|
|
|
data_type varchar(64),
|
|
|
|
|
create_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
update_time timestamp default CURRENT_TIMESTAMP
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table truncation_er_attribute is '实体表';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.ter_id is '片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.association_type is '关联类型 0:terId关联的id为实体 1:terId关联的id为关系';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.attribute is '实体名';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.value is '值';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.data_type is '数据类型 0:字符串 1:数字';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_er_attribute.update_time is '更新时间';
|
|
|
|
|
|
|
|
|
|
alter table truncation_er_attribute
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table truncation_relation_extraction
|
|
|
|
|
(
|
|
|
|
|
id varchar(255) not null
|
|
|
|
|
primary key,
|
|
|
|
|
truncation_id varchar(255),
|
|
|
|
|
source text,
|
|
|
|
|
source_type varchar(255),
|
|
|
|
|
target text,
|
|
|
|
|
target_type varchar(255),
|
|
|
|
|
relation text,
|
|
|
|
|
create_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
update_time timestamp default CURRENT_TIMESTAMP
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table truncation_relation_extraction is '片段关系抽取';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.source is '头节点';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.source_type is '头节点类型';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.target is '尾节点';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.target_type is '尾节点类型';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.relation is '关系';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column truncation_relation_extraction.update_time is '更新时间';
|
|
|
|
|
|
|
|
|
|
alter table truncation_relation_extraction
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table domain_metadata
|
|
|
|
|
(
|
|
|
|
|
id varchar(255) not null
|
|
|
|
|
primary key,
|
|
|
|
|
domain_type varchar(255),
|
|
|
|
|
source_type varchar(64),
|
|
|
|
|
relation varchar(64),
|
|
|
|
|
target_type varchar(255),
|
|
|
|
|
generation_type varchar(64),
|
|
|
|
|
create_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
update_time timestamp default CURRENT_TIMESTAMP
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table domain_metadata is '领域元数据';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.domain_type is '领域类型';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.source_type is '头节点类型';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.relation is '关系';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.target_type is '尾节点类型';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.generation_type is '数据来源:0=手动录入,1=系统自动';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column domain_metadata.update_time is '更新时间';
|
|
|
|
|
|
|
|
|
|
alter table domain_metadata
|
|
|
|
|
owner to postgres;
|
|
|
|
|
|
|
|
|
|
create table chinese_english_words
|
|
|
|
|
(
|
|
|
|
|
chinese_word varchar(1024) not null
|
|
|
|
|
primary key,
|
|
|
|
|
english_word text,
|
|
|
|
|
create_time timestamp default CURRENT_TIMESTAMP,
|
|
|
|
|
update_time timestamp default CURRENT_TIMESTAMP
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
comment on table chinese_english_words is '中英文对照字典';
|
|
|
|
|
|
|
|
|
|
comment on column chinese_english_words.chinese_word is '中文';
|
|
|
|
|
|
|
|
|
|
comment on column chinese_english_words.english_word is '英文';
|
|
|
|
|
|
|
|
|
|
comment on column chinese_english_words.create_time is '创建时间';
|
|
|
|
|
|
|
|
|
|
comment on column chinese_english_words.update_time is '更新时间';
|
|
|
|
|
|
|
|
|
|
alter table chinese_english_words
|
|
|
|
|
owner to postgres;
|