You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

240 lines
7.6 KiB
SQL

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

create table pdf_analysis_output
(
id serial
primary key,
layout_type integer not null,
content text,
page_no integer not null,
pdf_id integer not null,
table_title varchar(100),
display_order integer,
create_time timestamp default now()
);
comment on column pdf_analysis_output.layout_type is '0-文本 1-表格';
comment on column pdf_analysis_output.content is 'pdf段落内容';
comment on column pdf_analysis_output.page_no is 'pdf页码';
comment on column pdf_analysis_output.pdf_id is 'pdf_info表的主键';
comment on column pdf_analysis_output.table_title is '表格标题';
comment on column pdf_analysis_output.display_order is '内容在pdf页面中的顺序越小表示顺序越靠前';
alter table pdf_analysis_output
owner to postgres;
create table pdf_info
(
id serial,
path varchar(200) not null,
filename varchar(100),
create_time timestamp default now(),
process_status integer default 0,
analysis_start_time timestamp,
analysis_end_time timestamp,
extraction_start_time timestamp,
extraction_end_time timestamp
);
comment on table pdf_info is 'pdf信息';
comment on column pdf_info.path is 'pdf路径';
comment on column pdf_info.filename is '文件名';
comment on column pdf_info.create_time is '创建时间';
comment on column pdf_info.process_status is '处理状态 0:未分析 1正在分析 2分析成功 3分析失败 4开始数据抽取 5抽取成功 6抽取失败';
comment on column pdf_info.analysis_start_time is '开始分析时间';
comment on column pdf_info.analysis_end_time is '分析结束时间';
comment on column pdf_info.extraction_start_time is '开始抽取时间';
comment on column pdf_info.extraction_end_time is '结束抽取时间';
alter table pdf_info
owner to postgres;
create table document_truncation
(
id varchar(255) not null
primary key,
document_id varchar(64),
section_id integer,
layout_type varchar(64),
content text,
create_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP,
title varchar(1024)
);
comment on table document_truncation is '文档切分表';
comment on column document_truncation.document_id is '文档id(pdf_info表的id)';
comment on column document_truncation.section_id is '段落id pdf_analysis_output表的id';
comment on column document_truncation.layout_type is '布局类型 0-文本 1-表格';
comment on column document_truncation.content is '片段内容';
comment on column document_truncation.create_time is '创建时间';
comment on column document_truncation.update_time is '更新时间';
comment on column document_truncation.title is '标题';
alter table document_truncation
owner to postgres;
create table truncation_entity_extraction
(
id varchar(255) not null
primary key,
truncation_id varchar(255),
entity text,
name text,
create_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP
);
comment on table truncation_entity_extraction is '片段实体抽取';
comment on column truncation_entity_extraction.id is '主键';
comment on column truncation_entity_extraction.truncation_id is '片段id document_truncation表的id';
comment on column truncation_entity_extraction.entity is '标签(实体类型)';
comment on column truncation_entity_extraction.name is '实体名';
comment on column truncation_entity_extraction.create_time is '创建时间';
comment on column truncation_entity_extraction.update_time is '更新时间';
alter table truncation_entity_extraction
owner to postgres;
create table truncation_er_attribute
(
id varchar(255) not null
primary key,
ter_id varchar(64),
association_type varchar(255),
attribute text,
value text,
data_type varchar(64),
create_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP
);
comment on table truncation_er_attribute is '实体表';
comment on column truncation_er_attribute.ter_id is '片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id';
comment on column truncation_er_attribute.association_type is '关联类型 0terId关联的id为实体 1terId关联的id为关系';
comment on column truncation_er_attribute.attribute is '实体名';
comment on column truncation_er_attribute.value is '';
comment on column truncation_er_attribute.data_type is '数据类型 0字符串 1数字';
comment on column truncation_er_attribute.create_time is '创建时间';
comment on column truncation_er_attribute.update_time is '更新时间';
alter table truncation_er_attribute
owner to postgres;
create table truncation_relation_extraction
(
id varchar(255) not null
primary key,
truncation_id varchar(255),
source text,
source_type varchar(255),
target text,
target_type varchar(255),
relation text,
create_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP
);
comment on table truncation_relation_extraction is '片段关系抽取';
comment on column truncation_relation_extraction.source is '头节点';
comment on column truncation_relation_extraction.source_type is '头节点类型';
comment on column truncation_relation_extraction.target is '尾节点';
comment on column truncation_relation_extraction.target_type is '尾节点类型';
comment on column truncation_relation_extraction.relation is '关系';
comment on column truncation_relation_extraction.create_time is '创建时间';
comment on column truncation_relation_extraction.update_time is '更新时间';
alter table truncation_relation_extraction
owner to postgres;
create table domain_metadata
(
id varchar(255) not null
primary key,
domain_type varchar(255),
source_type varchar(64),
relation varchar(64),
target_type varchar(255),
generation_type varchar(64),
create_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP
);
comment on table domain_metadata is '领域元数据';
comment on column domain_metadata.domain_type is '领域类型';
comment on column domain_metadata.source_type is '头节点类型';
comment on column domain_metadata.relation is '关系';
comment on column domain_metadata.target_type is '尾节点类型';
comment on column domain_metadata.generation_type is '数据来源0=手动录入1=系统自动';
comment on column domain_metadata.create_time is '创建时间';
comment on column domain_metadata.update_time is '更新时间';
alter table domain_metadata
owner to postgres;
create table chinese_english_words
(
chinese_word varchar(1024) not null
primary key,
english_word text,
create_time timestamp default CURRENT_TIMESTAMP,
update_time timestamp default CURRENT_TIMESTAMP
);
comment on table chinese_english_words is '中英文对照字典';
comment on column chinese_english_words.chinese_word is '中文';
comment on column chinese_english_words.english_word is '英文';
comment on column chinese_english_words.create_time is '创建时间';
comment on column chinese_english_words.update_time is '更新时间';
alter table chinese_english_words
owner to postgres;