Tripal v1.0 (6.x-1.0)
|
Go to the source code of this file.
Functions | |
tripal_feature_gff3_load_form () | |
tripal_feature_gff3_load_form_validate ($form, &$form_state) | |
tripal_feature_gff3_load_form_submit ($form, &$form_state) | |
tripal_feature_load_gff3 ($gff_file, $organism_id, $analysis_id, $add_only=0, $update=0, $refresh=0, $remove=0, $use_transaction=1, $target_organism_id=NULL, $target_type=NULL, $create_target=0, $start_line=1, $landmark_type= '', $alt_id_attr= '', $job=NULL) | |
tripal_feature_load_gff3_derives_from ($feature, $subject, $organism) | |
tripal_feature_load_gff3_parents ($feature, $cvterm, $parents, $organism_id, $fmin) | |
tripal_feature_load_gff3_dbxref ($feature, $dbxrefs) | |
tripal_feature_load_gff3_ontology ($feature, $dbxrefs) | |
tripal_feature_load_gff3_alias ($feature, $aliases) | |
tripal_feature_load_gff3_feature ($organism, $analysis_id, $cvterm, $uniquename, $name, $residues, $is_analysis= 'f', $is_obsolete= 'f', $add_only, $score) | |
tripal_feature_load_gff3_featureloc ($feature, $organism, $landmark, $fmin, $fmax, $strand, $phase, $is_fmin_partial, $is_fmax_partial, $residue_info, $locgroup, $landmark_type_id= '', $landmark_organism_id= '', $create_landmark=0, $landmark_is_target=0) | |
tripal_feature_load_gff3_property ($feature, $property, $value) | |
tripal_feature_load_gff3_fasta ($fh, $interval, &$num_read, &$intv_read, &$line_num) | |
tripal_feature_load_gff3_target ($feature, $tags, $target_organism_id, $target_type, $create_target, $attr_locgroup) |
Definition in file gff_loader.inc.
tripal_feature_load_gff3_fasta | ( | $ | fh, |
$ | interval, | ||
&$ | num_read, | ||
&$ | intv_read, | ||
&$ | line_num | ||
) |
Definition at line 1718 of file gff_loader.inc.
{ print "Loading FASTA sequences\n"; $residues = ''; $sql = " PREPARE sel_gfftemp_un (text) AS SELECT feature_id FROM tripal_gff_temp WHERE uniquename = $1 "; $status = tripal_core_chado_prepare('sel_gfftemp_un', $sql, array('text')); if (!$status) { watchdog('T_gff3_loader', 'Cannot prepare statement \'sel_gfftemp_un\'.', array(), WATCHDOG_ERROR); return ''; } $id = NULL; // iterate through the remaining lines of the file while ($line = fgets($fh)) { $line_num++; $size = drupal_strlen($line); $num_read += $size; $intv_read += $size; $line = trim($line); // update the job status every 1% features if ($job and $intv_read >= $interval) { $intv_read = 0; $percent = sprintf("%.2f", ($num_read / $filesize) * 100); print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r"; tripal_job_set_progress($job, intval(($num_read / $filesize) * 100)); } // if we encounter a definition line then get the name, uniquename, // accession and relationship subject from the definition line if (preg_match('/^>/', $line)) { // if we are beginning a new sequence then save the last one we // just finished. if ($id) { $sql = "EXECUTE sel_gfftemp_un('%s')"; $result = tripal_core_chado_execute_prepared('sel_gfftemp_un', $sql, array($id)); if (!$result) { watchdog('T_gff3_loader', 'Cannot find feature to assign FASTA sequence: %uname', array('%uname' => $id), WATCHDOG_WARNING); } // if we have a feature then add the residues else { $feature = db_fetch_object($result); $values = array('residues' => $residues); $match = array('feature_id' => $feature->feature_id); $options = array('statement_name' => 'upd_feature_re'); tripal_core_chado_update('feature', $match, $values, $options); } } // get the feature ID for this ID from the tripal_gff_temp table $id = preg_replace('/^>(.*)$/', '\1', $line); $residues = ''; } else { $residues .= trim($line); } } // add in the last sequence $sql = "EXECUTE sel_gfftemp_un('%s')"; $result = tripal_core_chado_execute_prepared('sel_gfftemp_un', $sql, array($id)); if (!$result) { watchdog('T_gff3_loader', 'Cannot find feature to assign FASTA sequence: %uname', array('%uname' => $id), WATCHDOG_WARNING); } // if we have a feature then add the residues else { $feature = db_fetch_object($result); $values = array('residues' => $residues); $match = array('feature_id' => $feature->feature_id); $options = array('statement_name' => 'upd_feature_re'); tripal_core_chado_update('feature', $match, $values, $options); } }
tripal_feature_load_gff3_target | ( | $ | feature, |
$ | tags, | ||
$ | target_organism_id, | ||
$ | target_type, | ||
$ | create_target, | ||
$ | attr_locgroup | ||
) |
Definition at line 1802 of file gff_loader.inc.
{ // format is: "target_id start end [strand]", where strand is optional and may be "+" or "-" $matched = preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags['Target'][0]), $matches); // the organism and type of the target may also be specified as an attribute. If so, then get that // information $gff_target_organism = array_key_exists('target_organism', $tags) ? $tags['target_organism'][0] : ''; $gff_target_type = array_key_exists('target_type', $tags) ? $tags['target_type'][0] : ''; // if we have matches and the Target is in the correct format then load the alignment if ($matched) { $target_feature = $matches[1]; $start = $matches[2]; $end = $matches[3]; // if we have an optional strand, convert it to a numeric value. if ($matches[4]) { if (preg_match('/^\+$/', trim($matches[4]))) { $target_strand = 1; } elseif (preg_match('/^\-$/', trim($matches[4]))) { $target_strand = -1; } else { $target_strand = 0; } } else { $target_strand = 0; } $target_fmin = $start - 1; $target_fmax = $end; if ($end < $start) { $target_fmin = $end - 1; $target_fmax = $start; } // default the target organism to be the value passed into the function, but if the GFF // file species the target organism then use that instead. $t_organism_id = $target_organism_id; if ($gff_target_organism) { // get the genus and species $success = preg_match('/^(.*?):(.*?)$/', $gff_target_organism, $matches); if ($success) { $values = array( 'genus' => $matches[1], 'species' => $matches[2], ); $options = array('statement_name' => 'sel_organism_gesp'); $torganism = tripal_core_chado_select('organism', array('organism_id'), $values, $options); if (count($torganism) == 1) { $t_organism_id = $torganism[0]->organism_id; } else { watchdog('T_gff3_loader', "Cannot find organism for target %target.", array('%target' => $gff_target_organism), WATCHDOG_WARNING); $t_organism_id = ''; } } else { watchdog('T_gff3_loader', "The target_organism attribute is improperly formatted: %target. It should be target_organism=genus:species.", array('%target' => $gff_target_organism), WATCHDOG_WARNING); $t_organism_id = ''; } } // default the target type to be the value passed into the function, but if the GFF file // species the target type then use that instead $t_type_id = ''; if ($target_type) { $values = array( 'name' => $target_type, 'cv_id' => array( 'name' => 'sequence', ) ); $options = array('statement_name' => 'sel_cvterm_nacv'); $type = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options); if (count($type) == 1) { $t_type_id = $type[0]->cvterm_id; } else { watchdog('T_gff3_loader', "The target type does not exist in the sequence ontology: %type. ", array('%type' => $target_type), WATCHDOG_ERROR); exit; } } if ($gff_target_type) { $values = array( 'name' => $gff_target_type, 'cv_id' => array( 'name' => 'sequence', ) ); $options = array('statement_name' => 'sel_cvterm_nacv'); $type = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options); if (count($type) == 1) { $t_type_id = $type[0]->cvterm_id; } else { watchdog('T_gff3_loader', "The target_type attribute does not exist in the sequence ontology: %type. ", array('%type' => $gff_target_type), WATCHDOG_WARNING); $t_type_id = ''; } } // we want to add a featureloc record that uses the target feature as the srcfeature (landmark) // and the landmark as the feature. tripal_feature_load_gff3_featureloc($feature, $organism, $target_feature, $target_fmin, $target_fmax, $target_strand, $phase, $attr_fmin_partial, $attr_fmax_partial, $attr_residue_info, $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE); } // the target attribute is not correctly formatted else { watchdog('T_gff3_loader', "Could not add 'Target' alignment as it is improperly formatted: '%target'", array('%target' => $tags['Target'][0]), WATCHDOG_ERROR); } }