create table pdf_analysis_output
(
    id            serial
        primary key,
    layout_type   integer not null,
    content       text,
    page_no       integer not null,
    pdf_id        integer not null,
    table_title   varchar(100),
    display_order integer,
    create_time   timestamp default now()
);

comment on column pdf_analysis_output.layout_type is '0-文本 1-表格';

comment on column pdf_analysis_output.content is 'pdf段落内容';

comment on column pdf_analysis_output.page_no is 'pdf页码';

comment on column pdf_analysis_output.pdf_id is 'pdf_info表的主键';

comment on column pdf_analysis_output.table_title is '表格标题';

comment on column pdf_analysis_output.display_order is '内容在pdf页面中的顺序,越小表示顺序越靠前';

alter table pdf_analysis_output
    owner to postgres;

create table pdf_info
(
    id                    serial,
    path                  varchar(200) not null,
    filename              varchar(100),
    create_time           timestamp default now(),
    process_status        integer   default 0,
    analysis_start_time   timestamp,
    analysis_end_time     timestamp,
    extraction_start_time timestamp,
    extraction_end_time   timestamp
);

comment on table pdf_info is 'pdf信息';

comment on column pdf_info.path is 'pdf路径';

comment on column pdf_info.filename is '文件名';

comment on column pdf_info.create_time is '创建时间';

comment on column pdf_info.process_status is '处理状态 0:未分析 1:正在分析 2:分析成功 3:分析失败 4:开始数据抽取 5:抽取成功 6:抽取失败';

comment on column pdf_info.analysis_start_time is '开始分析时间';

comment on column pdf_info.analysis_end_time is '分析结束时间';

comment on column pdf_info.extraction_start_time is '开始抽取时间';

comment on column pdf_info.extraction_end_time is '结束抽取时间';

alter table pdf_info
    owner to postgres;

create table document_truncation
(
    id          varchar(255) not null
        primary key,
    document_id varchar(64),
    section_id  integer,
    layout_type varchar(64),
    content     text,
    create_time timestamp default CURRENT_TIMESTAMP,
    update_time timestamp default CURRENT_TIMESTAMP,
    title       varchar(1024)
);

comment on table document_truncation is '文档切分表';

comment on column document_truncation.document_id is '文档id(pdf_info表的id)';

comment on column document_truncation.section_id is '段落id pdf_analysis_output表的id';

comment on column document_truncation.layout_type is '布局类型 0-文本 1-表格';

comment on column document_truncation.content is '片段内容';

comment on column document_truncation.create_time is '创建时间';

comment on column document_truncation.update_time is '更新时间';

comment on column document_truncation.title is '标题';

alter table document_truncation
    owner to postgres;

create table truncation_entity_extraction
(
    id            varchar(255) not null
        primary key,
    truncation_id varchar(255),
    entity        text,
    name          text,
    create_time   timestamp default CURRENT_TIMESTAMP,
    update_time   timestamp default CURRENT_TIMESTAMP
);

comment on table truncation_entity_extraction is '片段实体抽取';

comment on column truncation_entity_extraction.id is '主键';

comment on column truncation_entity_extraction.truncation_id is '片段id document_truncation表的id';

comment on column truncation_entity_extraction.entity is '标签(实体类型)';

comment on column truncation_entity_extraction.name is '实体名';

comment on column truncation_entity_extraction.create_time is '创建时间';

comment on column truncation_entity_extraction.update_time is '更新时间';

alter table truncation_entity_extraction
    owner to postgres;

create table truncation_er_attribute
(
    id               varchar(255) not null
        primary key,
    ter_id           varchar(64),
    association_type varchar(255),
    attribute        text,
    value            text,
    data_type        varchar(64),
    create_time      timestamp default CURRENT_TIMESTAMP,
    update_time      timestamp default CURRENT_TIMESTAMP
);

comment on table truncation_er_attribute is '实体表';

comment on column truncation_er_attribute.ter_id is '片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id';

comment on column truncation_er_attribute.association_type is '关联类型 0:terId关联的id为实体 1:terId关联的id为关系';

comment on column truncation_er_attribute.attribute is '实体名';

comment on column truncation_er_attribute.value is '值';

comment on column truncation_er_attribute.data_type is '数据类型 0:字符串 1:数字';

comment on column truncation_er_attribute.create_time is '创建时间';

comment on column truncation_er_attribute.update_time is '更新时间';

alter table truncation_er_attribute
    owner to postgres;

create table truncation_relation_extraction
(
    id            varchar(255) not null
        primary key,
    truncation_id varchar(255),
    source        text,
    source_type   varchar(255),
    target        text,
    target_type   varchar(255),
    relation      text,
    create_time   timestamp default CURRENT_TIMESTAMP,
    update_time   timestamp default CURRENT_TIMESTAMP
);

comment on table truncation_relation_extraction is '片段关系抽取';

comment on column truncation_relation_extraction.source is '头节点';

comment on column truncation_relation_extraction.source_type is '头节点类型';

comment on column truncation_relation_extraction.target is '尾节点';

comment on column truncation_relation_extraction.target_type is '尾节点类型';

comment on column truncation_relation_extraction.relation is '关系';

comment on column truncation_relation_extraction.create_time is '创建时间';

comment on column truncation_relation_extraction.update_time is '更新时间';

alter table truncation_relation_extraction
    owner to postgres;

create table domain_metadata
(
    id              varchar(255) not null
        primary key,
    domain_type     varchar(255),
    source_type     varchar(64),
    relation        varchar(64),
    target_type     varchar(255),
    generation_type varchar(64),
    create_time     timestamp default CURRENT_TIMESTAMP,
    update_time     timestamp default CURRENT_TIMESTAMP
);

comment on table domain_metadata is '领域元数据';

comment on column domain_metadata.domain_type is '领域类型';

comment on column domain_metadata.source_type is '头节点类型';

comment on column domain_metadata.relation is '关系';

comment on column domain_metadata.target_type is '尾节点类型';

comment on column domain_metadata.generation_type is '数据来源:0=手动录入,1=系统自动';

comment on column domain_metadata.create_time is '创建时间';

comment on column domain_metadata.update_time is '更新时间';

alter table domain_metadata
    owner to postgres;

create table chinese_english_words
(
    chinese_word varchar(1024) not null
        primary key,
    english_word text,
    create_time  timestamp default CURRENT_TIMESTAMP,
    update_time  timestamp default CURRENT_TIMESTAMP
);

comment on table chinese_english_words is '中英文对照字典';

comment on column chinese_english_words.chinese_word is '中文';

comment on column chinese_english_words.english_word is '英文';

comment on column chinese_english_words.create_time is '创建时间';

comment on column chinese_english_words.update_time is '更新时间';

alter table chinese_english_words
    owner to postgres;