<?xml version="1.0" encoding="UTF-8"?><database name="egdb3_11_7" schema="url_verify" type="PostgreSQL - 15.10 (Ubuntu 15.10-1.pgdg24.04+1)">
   <sequences>
      <sequence increment="1" name="session_id_seq" startValue="1"/>
      <sequence increment="1" name="url_id_seq" startValue="1"/>
      <sequence increment="1" name="url_selector_id_seq" startValue="1"/>
      <sequence increment="1" name="url_verification_id_seq" startValue="1"/>
      <sequence increment="1" name="verification_attempt_id_seq" startValue="1"/>
   </sequences>
   <tables>
      <table name="session" remarks="" schema="url_verify" type="TABLE">
         <column autoUpdated="true" defaultValue="nextval('url_verify.session_id_seq'::regclass)" digits="0" id="0" name="id" nullable="false" remarks="" size="10" type="serial" typeCode="4">
            <child column="session" foreignKey="url_session_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url"/>
            <child column="session" foreignKey="url_selector_session_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url_selector"/>
            <child column="session" foreignKey="verification_attempt_session_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="verification_attempt"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="1" name="name" nullable="false" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="2" name="owning_lib" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="session_owning_lib_fkey" implied="false" onDeleteCascade="false" schema="actor" table="org_unit"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="3" name="creator" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="session_creator_fkey" implied="false" onDeleteCascade="false" schema="actor" table="usr"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="4" name="container" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="session_container_fkey" implied="false" onDeleteCascade="false" schema="container" table="biblio_record_entry_bucket"/>
         </column>
         <column autoUpdated="false" defaultValue="now()" digits="6" id="5" name="create_time" nullable="false" remarks="" size="35" type="timestamptz" typeCode="93"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="6" name="search" nullable="false" remarks="" size="2147483647" type="text" typeCode="12"/>
         <primaryKey column="id" sequenceNumberInPK="1"/>
         <index name="session_pkey" unique="true">
            <column ascending="true" name="id"/>
         </index>
         <index name="uvs_name_once_per_lib" unique="true">
            <column ascending="true" name="name"/>
            <column ascending="true" name="owning_lib"/>
         </index>
      </table>
      <table name="url" remarks="" schema="url_verify" type="TABLE">
         <column autoUpdated="true" defaultValue="nextval('url_verify.url_id_seq'::regclass)" digits="0" id="0" name="id" nullable="false" remarks="" size="10" type="serial" typeCode="4">
            <child column="redirect_from" foreignKey="url_redirect_from_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url"/>
            <child column="redirect_to" foreignKey="url_verification_redirect_to_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url_verification"/>
            <child column="url" foreignKey="url_verification_url_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url_verification"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="1" name="redirect_from" nullable="true" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_redirect_from_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="2" name="item" nullable="true" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_item_fkey" implied="false" onDeleteCascade="false" schema="container" table="biblio_record_entry_bucket_item"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="3" name="session" nullable="true" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_session_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="session"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="4" name="url_selector" nullable="true" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_url_selector_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url_selector"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="5" name="tag" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="6" name="subfield" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="7" name="ord" nullable="true" remarks="" size="10" type="int4" typeCode="4"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="8" name="full_url" nullable="false" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="9" name="scheme" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="10" name="username" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="11" name="password" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="12" name="host" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="13" name="domain" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="14" name="tld" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="15" name="port" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="16" name="path" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="17" name="page" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="18" name="query" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="19" name="fragment" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <primaryKey column="id" sequenceNumberInPK="1"/>
         <index name="url_pkey" unique="true">
            <column ascending="true" name="id"/>
         </index>
         <checkConstraint constraint="(((redirect_from IS NOT NULL) OR ((item IS NOT NULL) AND (url_selector IS NOT NULL) AND (tag IS NOT NULL) AND (subfield IS NOT NULL) AND (ord IS NOT NULL))))" name="redirect_or_from_item"/>
      </table>
      <table name="url_selector" remarks="" schema="url_verify" type="TABLE">
         <column autoUpdated="true" defaultValue="nextval('url_verify.url_selector_id_seq'::regclass)" digits="0" id="0" name="id" nullable="false" remarks="" size="10" type="serial" typeCode="4">
            <child column="url_selector" foreignKey="url_url_selector_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="1" name="xpath" nullable="false" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="2" name="session" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_selector_session_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="session"/>
         </column>
         <primaryKey column="id" sequenceNumberInPK="1"/>
         <index name="url_selector_pkey" unique="true">
            <column ascending="true" name="id"/>
         </index>
         <index name="tag_once_per_sess" unique="true">
            <column ascending="true" name="xpath"/>
            <column ascending="true" name="session"/>
         </index>
      </table>
      <table name="url_verification" remarks="" schema="url_verify" type="TABLE">
         <column autoUpdated="true" defaultValue="nextval('url_verify.url_verification_id_seq'::regclass)" digits="0" id="0" name="id" nullable="false" remarks="" size="10" type="serial" typeCode="4"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="1" name="url" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_verification_url_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="2" name="attempt" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_verification_attempt_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="verification_attempt"/>
         </column>
         <column autoUpdated="false" defaultValue="now()" digits="6" id="3" name="req_time" nullable="false" remarks="" size="35" type="timestamptz" typeCode="93"/>
         <column autoUpdated="false" defaultValue="null" digits="6" id="4" name="res_time" nullable="true" remarks="" size="35" type="timestamptz" typeCode="93"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="5" name="res_code" nullable="true" remarks="" size="10" type="int4" typeCode="4"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="6" name="res_text" nullable="true" remarks="" size="2147483647" type="text" typeCode="12"/>
         <column autoUpdated="false" defaultValue="null" digits="0" id="7" name="redirect_to" nullable="true" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="url_verification_redirect_to_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url"/>
         </column>
         <primaryKey column="id" sequenceNumberInPK="1"/>
         <index name="url_verification_pkey" unique="true">
            <column ascending="true" name="id"/>
         </index>
         <checkConstraint constraint="(((res_code &gt;= 100) AND (res_code &lt;= 999)))" name="url_verification_res_code_check"/>
      </table>
      <table name="verification_attempt" remarks="" schema="url_verify" type="TABLE">
         <column autoUpdated="true" defaultValue="nextval('url_verify.verification_attempt_id_seq'::regclass)" digits="0" id="0" name="id" nullable="false" remarks="" size="10" type="serial" typeCode="4">
            <child column="attempt" foreignKey="url_verification_attempt_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="url_verification"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="1" name="usr" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="verification_attempt_usr_fkey" implied="false" onDeleteCascade="false" schema="actor" table="usr"/>
         </column>
         <column autoUpdated="false" defaultValue="null" digits="0" id="2" name="session" nullable="false" remarks="" size="10" type="int4" typeCode="4">
            <parent column="id" foreignKey="verification_attempt_session_fkey" implied="false" onDeleteCascade="false" schema="url_verify" table="session"/>
         </column>
         <column autoUpdated="false" defaultValue="now()" digits="6" id="3" name="start_time" nullable="false" remarks="" size="35" type="timestamptz" typeCode="93"/>
         <column autoUpdated="false" defaultValue="null" digits="6" id="4" name="finish_time" nullable="true" remarks="" size="35" type="timestamptz" typeCode="93"/>
         <primaryKey column="id" sequenceNumberInPK="1"/>
         <index name="verification_attempt_pkey" unique="true">
            <column ascending="true" name="id"/>
         </index>
      </table>
   </tables>
   <routines>
      <routine dataAccess="MODIFIES" deterministic="false" name="extract_urls(session_id integer, item_id integer)" returnType="integer" securityType="INVOKER" type="FUNCTION">
         <comment/>
         <definition language="plpgsql"><![CDATA[DECLARE
    last_seen_tag TEXT;
    current_tag TEXT;
    current_sf TEXT;
    current_url TEXT;
    current_ord INT;
    current_url_pos INT;
    current_selector url_verify.url_selector%ROWTYPE;
BEGIN
    current_ord := 1;

    FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
        current_url_pos := 1;
        LOOP
            SELECT  (oils_xpath(current_selector.xpath || '/text()', b.marc))[current_url_pos] INTO current_url
              FROM  biblio.record_entry b
                    JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
              WHERE c.id = item_id;

            EXIT WHEN current_url IS NULL;

            SELECT  (oils_xpath(current_selector.xpath || '/../@tag', b.marc))[current_url_pos] INTO current_tag
              FROM  biblio.record_entry b
                    JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
              WHERE c.id = item_id;

            IF current_tag IS NULL THEN
                current_tag := last_seen_tag;
            ELSE
                last_seen_tag := current_tag;
            END IF;

            SELECT  (oils_xpath(current_selector.xpath || '/@code', b.marc))[current_url_pos] INTO current_sf
              FROM  biblio.record_entry b
                    JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
              WHERE c.id = item_id;

            INSERT INTO url_verify.url (session, item, url_selector, tag, subfield, ord, full_url)
              VALUES ( session_id, item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);

            current_url_pos := current_url_pos + 1;
            current_ord := current_ord + 1;
        END LOOP;
    END LOOP;

    RETURN current_ord - 1;
END;]]></definition>
         <parameters>
            <parameter mode="IN" name="session_id" type="integer"/>
            <parameter mode="IN" name="item_id" type="integer"/>
         </parameters>
      </routine>
      <routine dataAccess="MODIFIES" deterministic="false" name="ingest_url()" returnType="trigger" securityType="INVOKER" type="FUNCTION">
         <comment/>
         <definition language="plpgsql"><![CDATA[DECLARE
    tmp_row url_verify.url%ROWTYPE;
BEGIN
    SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url);

    NEW.scheme          := tmp_row.scheme;
    NEW.username        := tmp_row.username;
    NEW.password        := tmp_row.password;
    NEW.host            := tmp_row.host;
    NEW.domain          := tmp_row.domain;
    NEW.tld             := tmp_row.tld;
    NEW.port            := tmp_row.port;
    NEW.path            := tmp_row.path;
    NEW.page            := tmp_row.page;
    NEW.query           := tmp_row.query;
    NEW.fragment        := tmp_row.fragment;

    RETURN NEW;
END;]]></definition>
         <parameters>
            <parameter mode="IN"/>
         </parameters>
      </routine>
      <routine dataAccess="MODIFIES" deterministic="false" name="parse_url(url_in text)" returnType="url_verify.url" securityType="INVOKER" type="FUNCTION">
         <comment/>
         <definition language="plperlu"><![CDATA[use Rose::URI;

my $url_in = shift;
my $url = Rose::URI->new($url_in);

my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;

$parts{full_url} = $url_in;
($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;

return \%parts;]]></definition>
         <parameters>
            <parameter mode="IN" name="url_in" type="text"/>
         </parameters>
      </routine>
   </routines>
</database>
