Tripal v1.0 (6.x-1.0)
|
00001 <?php 00002 00023 function tripal_feature_fasta_load_form( ) { 00024 00025 $form['fasta_file']= array( 00026 '#type' => 'textfield', 00027 '#title' => t('FASTA File'), 00028 '#description' => t('Please enter the full system path for the FASTA file, or a path within the Drupal 00029 installation (e.g. /sites/default/files/xyz.obo). The path must be accessible to the 00030 server on which this Drupal instance is running.'), 00031 '#required' => TRUE, 00032 ); 00033 00034 // get the list of organisms 00035 $sql = "SELECT * FROM {organism} ORDER BY genus, species"; 00036 $org_rset = chado_query($sql); 00037 $organisms = array(); 00038 $organisms[''] = ''; 00039 while ($organism = db_fetch_object($org_rset)) { 00040 $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)"; 00041 } 00042 $form['organism_id'] = array( 00043 '#title' => t('Organism'), 00044 '#type' => t('select'), 00045 '#description' => t("Choose the organism to which these sequences are associated"), 00046 '#required' => TRUE, 00047 '#options' => $organisms, 00048 ); 00049 00050 $form['seqtype']= array( 00051 '#type' => 'textfield', 00052 '#title' => t('Sequence Type'), 00053 '#required' => TRUE, 00054 '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, protein, etc...)'), 00055 ); 00056 00057 00058 // get the list of organisms 00059 $sql = "SELECT L.library_id, L.name, CVT.name as type 00060 FROM {library} L 00061 INNER JOIN {cvterm} CVT ON L.type_id = CVT.cvterm_id 00062 ORDER BY name"; 00063 $lib_rset = chado_query($sql); 00064 $libraries = array(); 00065 $libraries[''] = ''; 00066 while ($library = db_fetch_object($lib_rset)) { 00067 $libraries[$library->library_id] = "$library->name ($library->type)"; 00068 } 00069 // $form['library_id'] = array ( 00070 // '#title' => t('Library'), 00071 // '#type' => t('select'), 00072 // '#description' => t("Choose the library to which these sequences are associated "), 00073 // '#required' => FALSE, 00074 // '#options' => $libraries, 00075 // '#weight' => 5, 00076 // ); 00077 $form['method']= array( 00078 '#type' => 'radios', 00079 '#title' => 'Method', 00080 '#required' => TRUE, 00081 '#options' => array( 00082 t('Insert only'), 00083 t('Update only'), 00084 t('Insert and update'), 00085 ), 00086 '#description' => t('Select how features in the FASTA file are handled. 00087 Select "Insert only" to insert the new features. If a feature already 00088 exists with the same name or unique name and type then it is skipped. 00089 Select "Update only" to only update featues that already exist in the 00090 database. Select "Insert and Update" to insert features that do 00091 not exist and upate those that do.'), 00092 '#default_value' => 2, 00093 ); 00094 00095 $form['match_type']= array( 00096 '#type' => 'radios', 00097 '#title' => 'Name Match Type', 00098 '#required' => TRUE, 00099 '#options' => array( 00100 t('Name'), 00101 t('Unique name'), 00102 ), 00103 '#description' => t('Used for "updates only" or "insert and update" methods. Not required if method type is "insert". 00104 Feature data is stored in Chado with both a human-readable 00105 name and a unique name. If the features in your FASTA file are uniquely identified using 00106 a human-readable name then select the "Name" button. If your features are 00107 uniquely identified using the unique name then select the "Unique name" button. If you 00108 loaded your features first using the GFF loader then the unique name of each 00109 features were indicated by the "ID=" attribute and the name by the "Name=" attribute. 00110 By default, the FASTA loader will use the first word (character string 00111 before the first space) as the name for your feature. If 00112 this does not uniquely identify your feature consider specifying a regular expression in the advanced section below. 00113 Additionally, you may import both a name and a unique name for each sequence using the advanced options.'), 00114 '#default_value' => 1, 00115 ); 00116 00117 $form['analysis'] = array( 00118 '#type' => 'fieldset', 00119 '#title' => t('Analysis Used to Derive Features'), 00120 '#collapsed' => TRUE 00121 ); 00122 $form['analysis']['desc'] = array( 00123 '#type' => 'markup', 00124 '#value' => t("Why specify an analysis for a data load? All data comes 00125 from some place, even if downloaded from Genbank. By specifying 00126 analysis details for all data uploads, it allows an end user to reproduce the 00127 data set, but at least indicates the source of the data."), 00128 ); 00129 00130 // get the list of organisms 00131 $sql = "SELECT * FROM {analysis} ORDER BY name"; 00132 $org_rset = chado_query($sql); 00133 $analyses = array(); 00134 $analyses[''] = ''; 00135 while ($analysis = db_fetch_object($org_rset)) { 00136 $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)"; 00137 } 00138 $form['analysis']['analysis_id'] = array( 00139 '#title' => t('Analysis'), 00140 '#type' => t('select'), 00141 '#description' => t("Choose the analysis to which these features are associated"), 00142 '#required' => TRUE, 00143 '#options' => $analyses, 00144 ); 00145 00146 // Advanced Options 00147 $form['advanced'] = array( 00148 '#type' => 'fieldset', 00149 '#title' => t('Advanced Options'), 00150 '#collapsible' => TRUE, 00151 '#collapsed' => TRUE 00152 ); 00153 $form['advanced']['re_help']= array( 00154 '#type' => 'item', 00155 '#value' => t('A regular expression is an advanced method for extracting information from a string of text. 00156 Your FASTA file may contain both a human-readable name and a unique name for each sequence. 00157 If you want to import 00158 both the name and unique name for all sequences, then you must provide regular expressions 00159 so that the loader knows how to separate them. 00160 Otherwise the name and uniquename will be the same. 00161 By default, this loader will use the first word in the definition 00162 lines of the FASTA file 00163 as the name or unique name of the feature.'), 00164 ); 00165 $form['advanced']['re_name']= array( 00166 '#type' => 'textfield', 00167 '#title' => t('Regular expression for the name'), 00168 '#required' => FALSE, 00169 '#description' => t('Enter the regular expression that will extract the 00170 feature name from the FASTA definition line. For example, for a 00171 defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename), 00172 the regular expression for the name would be, "^(.*?)\|.*$".'), 00173 ); 00174 $form['advanced']['re_uname']= array( 00175 '#type' => 'textfield', 00176 '#title' => t('Regular expression for the unique name'), 00177 '#required' => FALSE, 00178 '#description' => t('Enter the regular expression that will extract the 00179 feature name from the FASTA definition line. For example, for a 00180 defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename), 00181 the regular expression for the unique name would be "^.*?\|(.*)$").'), 00182 ); 00183 00184 00185 // Advanced database cross-reference optoins 00186 $form['advanced']['db'] = array( 00187 '#type' => 'fieldset', 00188 '#title' => t('External Database Reference'), 00189 '#weight' => 6, 00190 '#collapsed' => TRUE 00191 ); 00192 $form['advanced']['db']['re_accession']= array( 00193 '#type' => 'textfield', 00194 '#title' => t('Regular expression for the accession'), 00195 '#required' => FALSE, 00196 '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'), 00197 '#weight' => 2 00198 ); 00199 00200 // get the list of databases 00201 $sql = "SELECT * FROM {db} ORDER BY name"; 00202 $db_rset = chado_query($sql); 00203 $dbs = array(); 00204 $dbs[''] = ''; 00205 while ($db = db_fetch_object($db_rset)) { 00206 $dbs[$db->db_id] = "$db->name"; 00207 } 00208 $form['advanced']['db']['db_id'] = array( 00209 '#title' => t('External Database'), 00210 '#type' => t('select'), 00211 '#description' => t("Plese choose an external database for which these sequences have a cross reference."), 00212 '#required' => FALSE, 00213 '#options' => $dbs, 00214 '#weight' => 1, 00215 ); 00216 00217 $form['advanced']['relationship'] = array( 00218 '#type' => 'fieldset', 00219 '#title' => t('Relationships'), 00220 '#weight' => 6, 00221 '#collapsed' => TRUE 00222 ); 00223 $rels = array(); 00224 $rels[''] = ''; 00225 $rels['part_of'] = 'part of'; 00226 $rels['derives_from'] = 'produced by'; 00227 00228 00229 // Advanced references options 00230 $form['advanced']['relationship']['rel_type']= array( 00231 '#title' => t('Relationship Type'), 00232 '#type' => t('select'), 00233 '#description' => t("Use this option to create associations, or relationships between the 00234 features of this FASTA file and existing features in the database. For 00235 example, to associate a FASTA file of peptides to existing genes or transcript sequence, 00236 select the type 'produced by'. For a CDS sequences select the type 'part of'"), 00237 '#required' => FALSE, 00238 '#options' => $rels, 00239 '#weight' => 5, 00240 ); 00241 $form['advanced']['relationship']['re_subject']= array( 00242 '#type' => 'textfield', 00243 '#title' => t('Regular expression for the parent'), 00244 '#required' => FALSE, 00245 '#description' => t('Enter the regular expression that will extract the unique 00246 name needed to identify the existing sequence for which the 00247 relationship type selected above will apply.'), 00248 '#weight' => 6 00249 ); 00250 $form['advanced']['relationship']['parent_type']= array( 00251 '#type' => 'textfield', 00252 '#title' => t('Parent Type'), 00253 '#required' => FALSE, 00254 '#description' => t('Please enter the Sequence Ontology term for the parent. For example 00255 if the FASTA file being loaded is a set of proteins that are 00256 products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However, 00257 this type must match the type for already loaded features.'), 00258 '#weight' => 7 00259 ); 00260 00261 $form['button'] = array( 00262 '#type' => 'submit', 00263 '#value' => t('Import FASTA file'), 00264 '#weight' => 10, 00265 ); 00266 return $form; 00267 } 00268 00274 function tripal_feature_fasta_load_form_validate($form, &$form_state) { 00275 $fasta_file = trim($form_state['values']['fasta_file']); 00276 $organism_id = $form_state['values']['organism_id']; 00277 $type = trim($form_state['values']['seqtype']); 00278 $method = trim($form_state['values']['method']); 00279 $match_type = trim($form_state['values']['match_type']); 00280 $library_id = $form_state['values']['library_id']; 00281 $re_name = trim($form_state['values']['re_name']); 00282 $re_uname = trim($form_state['values']['re_uname']); 00283 $re_accession = trim($form_state['values']['re_accession']); 00284 $db_id = $form_state['values']['db_id']; 00285 $rel_type = $form_state['values']['rel_type']; 00286 $re_subject = trim($form_state['values']['re_subject']); 00287 $parent_type = trim($form_state['values']['parent_type']); 00288 00289 if ($method == 0) { 00290 $method = 'Insert only'; 00291 } 00292 if ($method == 1) { 00293 $method = 'Update only'; 00294 } 00295 if ($method == 2) { 00296 $method = 'Insert and update'; 00297 } 00298 00299 if ($match_type == 0) { 00300 $match_type = 'Name'; 00301 } 00302 00303 if ($match_type == 1) { 00304 $match_type = 'Unique name'; 00305 } 00306 00307 00308 if ($re_name and !$re_uname and strcmp($match_type, 'Unique name')==0) { 00309 form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name")); 00310 } 00311 00312 if (!$re_name and $re_uname and strcmp($match_type, 'Name')==0) { 00313 form_set_error('re_name', t("You must provide a regular expression to identify the sequence name")); 00314 } 00315 00316 // check to see if the file is located local to Drupal 00317 $fasta_file = trim($fasta_file); 00318 $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file; 00319 if (!file_exists($dfile)) { 00320 // if not local to Drupal, the file must be someplace else, just use 00321 // the full path provided 00322 $dfile = $fasta_file; 00323 } 00324 if (!file_exists($dfile)) { 00325 form_set_error('fasta_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file.")); 00326 } 00327 00328 // make sure if a relationship is specified that all fields are provided. 00329 if (($rel_type or $parent_type) and !$re_subject) { 00330 form_set_error('re_subject', t("Please provide a regular expression for the parent")); 00331 } 00332 if (($rel_type or $re_subject) and !$parent_type) { 00333 form_set_error('parent_type', t("Please provide a SO term for the parent")); 00334 } 00335 if (($parent_type or $re_subject) and !$rel_type) { 00336 form_set_error('rel_type', t("Please select a relationship type")); 00337 } 00338 00339 00340 // make sure if a database is specified that all fields are provided 00341 if ($db_id and !$re_accession) { 00342 form_set_error('re_accession', t("Please provide a regular expression for the accession")); 00343 } 00344 if ($re_accession and !$db_id) { 00345 form_set_error('db_id', t("Please select a database")); 00346 } 00347 00348 // check to make sure the types exists 00349 $cvtermsql = "SELECT CVT.cvterm_id 00350 FROM {cvterm} CVT 00351 INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id 00352 LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id 00353 WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')"; 00354 $cvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $type, $type)); 00355 if (!$cvterm) { 00356 form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another.")); 00357 } 00358 if ($rel_type) { 00359 $cvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $parent_type, $parent_type)); 00360 if (!$cvterm) { 00361 form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another.")); 00362 } 00363 } 00364 00365 // check to make sure the 'relationship' and 'sequence' ontologies are loaded 00366 $form_state['storage']['dfile'] = $dfile; 00367 } 00368 00374 function tripal_feature_fasta_load_form_submit($form, &$form_state) { 00375 global $user; 00376 00377 $dfile = $form_state['storage']['dfile']; 00378 $organism_id = $form_state['values']['organism_id']; 00379 $type = trim($form_state['values']['seqtype']); 00380 $method = trim($form_state['values']['method']); 00381 $match_type = trim($form_state['values']['match_type']); 00382 $library_id = $form_state['values']['library_id']; 00383 $re_name = trim($form_state['values']['re_name']); 00384 $re_uname = trim($form_state['values']['re_uname']); 00385 $re_accession = trim($form_state['values']['re_accession']); 00386 $db_id = $form_state['values']['db_id']; 00387 $rel_type = $form_state['values']['rel_type']; 00388 $re_subject = trim($form_state['values']['re_subject']); 00389 $parent_type = trim($form_state['values']['parent_type']); 00390 $analysis_id = $form_state['values']['analysis_id']; 00391 00392 if ($method == 0) { 00393 $method = 'Insert only'; 00394 } 00395 if ($method == 1) { 00396 $method = 'Update only'; 00397 } 00398 if ($method == 2) { 00399 $method = 'Insert and update'; 00400 } 00401 00402 if ($match_type == 0) { 00403 $match_type = 'Name'; 00404 } 00405 00406 if ($match_type == 1) { 00407 $match_type = 'Unique name'; 00408 } 00409 00410 $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname, 00411 $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method, 00412 $user->uid, $analysis_id, $match_type); 00413 00414 $fname = preg_replace("/.*\/(.*)/", "$1", $dfile); 00415 tripal_add_job("Import FASTA file: $fname", 'tripal_feature', 00416 'tripal_feature_load_fasta', $args, $user->uid); 00417 } 00418 00424 function tripal_feature_load_fasta($dfile, $organism_id, $type, 00425 $library_id, $re_name, $re_uname, $re_accession, $db_id, $rel_type, 00426 $re_subject, $parent_type, $method, $uid, $analysis_id, 00427 $match_type, $job = NULL) { 00428 00429 // begin the transaction 00430 $connection = tripal_db_start_transaction(); 00431 00432 // if we cannot get a connection then let the user know the loading will be slow 00433 if (!$connection) { 00434 print "A persistant connection was not obtained. Loading will be slow\n"; 00435 } 00436 else { 00437 print "\nNOTE: Loading of this FASTA file is performed using a database transaction. \n" . 00438 "If the load fails or is terminated prematurely then the entire set of \n" . 00439 "insertions/updates is rolled back and will not be found in the database\n\n"; 00440 } 00441 00442 // first get the type for this sequence 00443 $cvtermsql = "SELECT CVT.cvterm_id 00444 FROM {cvterm} CVT 00445 INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id 00446 LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id 00447 WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')"; 00448 $cvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $type, $type)); 00449 if (!$cvterm) { 00450 watchdog("T_fasta_loader", "Cannot find the term type: '%type'", array('%type' => $type), WATCHDOG_ERROR); 00451 return 0; 00452 } 00453 if ($parent_type) { 00454 $parentcvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $parent_type, $parent_type)); 00455 if (!$parentcvterm) { 00456 watchdog("T_fasta_loader", "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm), WATCHDOG_ERROR); 00457 return 0; 00458 } 00459 } 00460 if ($rel_type) { 00461 $relcvterm = db_fetch_object(chado_query($cvtermsql, 'relationship', $rel_type, $rel_type)); 00462 if (!$relcvterm) { 00463 watchdog("T_fasta_loader", "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm), WATCHDOG_ERROR); 00464 return 0; 00465 } 00466 } 00467 00468 print "Opening FASTA file $dfile\n"; 00469 00470 //$lines = file($dfile, FILE_SKIP_EMPTY_LINES); 00471 $fh = fopen($dfile, 'r'); 00472 if (!$fh) { 00473 watchdog('T_fasta_loader', "cannot open file: %dfile", array('%dfile' => $dfile), WATCHDOG_ERROR); 00474 return 0; 00475 } 00476 $filesize = filesize($dfile); 00477 $i = 0; 00478 00479 $name = ''; 00480 $uname = ''; 00481 $residues = ''; 00482 $interval = intval($filesize * 0.01); 00483 if ($interval < 1) { 00484 $interval = 1; 00485 } 00486 $inv_read = 0; 00487 00488 // we need to get the table schema to make sure we don't overrun the 00489 // size of fields with what our regular expressions retrieve 00490 $feature_tbl = tripal_core_get_chado_table_schema('feature'); 00491 $dbxref_tbl = tripal_core_get_chado_table_schema('dbxref'); 00492 00493 //foreach ($lines as $line_num => $line) { 00494 while ($line = fgets($fh)) { 00495 $i++; // update the line count 00496 $num_read += drupal_strlen($line); 00497 $intv_read += drupal_strlen($line); 00498 00499 // if we encounter a definition line then get the name, uniquename, 00500 // accession and relationship subject from the definition line 00501 if (preg_match('/^>/', $line)) { 00502 // if we have a feature name then we are starting a new sequence 00503 // so lets handle the previous one before moving on 00504 if ($name or $uname) { 00505 tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, 00506 $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 00507 $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm); 00508 $residues = ''; 00509 $name = ''; 00510 $uname = ''; 00511 } 00512 00513 $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline 00514 00515 // get the feature name 00516 if ($re_name) { 00517 if (!preg_match("/$re_name/", $line, $matches)) { 00518 watchdog('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error'); 00519 } 00520 elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) { 00521 watchdog('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error'); 00522 } 00523 else { 00524 $name = trim($matches[1]); 00525 } 00526 } 00527 else { 00528 // if the match_type is name and no regular expression was provided 00529 // then use the first word as the name, otherwise we don't set the name 00530 if (strcmp($match_type, 'Name')==0) { 00531 if(preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)){ 00532 if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) { 00533 watchdog('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error'); 00534 } 00535 else { 00536 $name = trim($matches[1]); 00537 } 00538 } 00539 else { 00540 watchdog('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error'); 00541 } 00542 } 00543 } 00544 00545 // get the feature unique name 00546 if ($re_uname) { 00547 if (!preg_match("/$re_uname/", $line, $matches)) { 00548 watchdog('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error'); 00549 } 00550 $uname = trim($matches[1]); 00551 } 00552 else { 00553 // if the match_type is name and no regular expression was provided 00554 // then use the first word as the name, otherwise, we don't set the unqiuename 00555 if (strcmp($match_type, 'Unique name')==0) { 00556 if(preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)){ 00557 $uname = trim($matches[1]); 00558 } 00559 else { 00560 watchdog('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error'); 00561 } 00562 } 00563 } 00564 // get the accession 00565 preg_match("/$re_accession/", $line, $matches); 00566 if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) { 00567 watchdog('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning'); 00568 } 00569 else { 00570 $accession = trim($matches[1]); 00571 } 00572 00573 // get the relationship subject 00574 preg_match("/$re_subject/", $line, $matches); 00575 $subject = trim($matches[1]); 00576 } 00577 else { 00578 $residues .= trim($line); 00579 00580 // update the job status every % features 00581 if ($job and $intv_read >= $interval) { 00582 $intv_read = 0; 00583 $percent = sprintf("%.2f", ($num_read / $filesize) * 100); 00584 if ($name) { 00585 print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r"; 00586 } 00587 else { 00588 print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r"; 00589 } 00590 tripal_job_set_progress($job, intval(($num_read / $filesize) * 100)); 00591 } 00592 } 00593 } 00594 00595 // now load the last sequence in the file 00596 tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, 00597 $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 00598 $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm); 00599 00600 // commit the transaction 00601 tripal_db_commit_transaction(); 00602 print "\nDone\n"; 00603 } 00604 00610 function tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, $accession, 00611 $parent, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm, 00612 $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm) { 00613 00614 // check to see if this feature already exists if the match_type is 'Name' 00615 if (strcmp($match_type, 'Name')==0) { 00616 $values = array( 00617 'organism_id' => $organism_id, 00618 'name' => $name, 00619 'type_id' => $cvterm->cvterm_id, 00620 ); 00621 $options = array('statement_name' => 'sel_feature_ornaty'); 00622 $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options); 00623 if (count($results) > 1) { 00624 watchdog('T_fasta_loader', "Multiple features exist with the name '%name' of type 00625 '%type' for the organism. skipping", array('%name' => $name, '%type' => $type)); 00626 return 0; 00627 } 00628 if (count($results) == 1) { 00629 $feature = $results[0]; 00630 } 00631 } 00632 // check to see if this feature already exists if the match_type is 'Unique Name' 00633 if (strcmp($match_type, 'Unique name')==0) { 00634 $values = array( 00635 'organism_id' => $organism_id, 00636 'uniquename' => $uname, 00637 'type_id' => $cvterm->cvterm_id, 00638 ); 00639 00640 $options = array('statement_name' => 'sel_feature_oruqty'); 00641 $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options); 00642 if (count($results) > 1) { 00643 watchdog('T_fasta_loader', "Multiple features exist with the name '%name' of type 00644 '%type' for the organism. skipping", array('%name' => $name, '%type' => $type)); 00645 return 0; 00646 } 00647 if (count($results) == 1) { 00648 $feature = $results[0]; 00649 } 00650 00651 // if the feature exists but this is an "insert only" method then skip this feature 00652 if ($feature and (strcmp($method, 'Insert only')==0)) { 00653 watchdog('T_fasta_loader', "Feature already exists '%name' ('%uname') while matching on %type. Skipping insert.", 00654 array('%name' => $name, '%uname' => $uname, '%type' => drupal_strtolower($match_type)), WATCHDOG_WARNING); 00655 return 0; 00656 } 00657 } 00658 00659 // if we don't have a feature and we're doing an insert then do the insert 00660 $inserted = 0; 00661 if (!$feature and (strcmp($method, 'Insert only')==0 or strcmp($method, 'Insert and update')==0)) { 00662 // if we have a unique name but not a name then set them to be the same and vice versa 00663 if (!$uname) { 00664 $uname = $name; 00665 } 00666 elseif (!$name) { 00667 $name = $uname; 00668 } 00669 00670 // insert the feature 00671 $values = array( 00672 'organism_id' => $organism_id, 00673 'name' => $name, 00674 'uniquename' => $uname, 00675 'residues' => $residues, 00676 'seqlen' => drupal_strlen($residues), 00677 'md5checksum' => md5($residues), 00678 'type_id' => $cvterm->cvterm_id, 00679 'is_analysis' => 'FALSE', 00680 'is_obsolete' => 'FALSE', 00681 ); 00682 $options = array('statement_name' => 'ins_feature_all'); 00683 $success = tripal_core_chado_insert('feature', $values, $options); 00684 if (!$success) { 00685 watchdog('T_fasta_loader', "Failed to insert feature '%name (%uname)'", 00686 array('%name' => $name, '%uname' => $numane), WATCHDOG_ERROR); 00687 return 0; 00688 } 00689 00690 // now get the feature we just inserted 00691 $values = array( 00692 'organism_id' => $organism_id, 00693 'uniquename' => $uname, 00694 'type_id' => $cvterm->cvterm_id, 00695 ); 00696 $options = array('statement_name' => 'sel_feature_oruqty'); 00697 $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options); 00698 if (count($results) == 1) { 00699 $inserted = 1; 00700 $feature = $results[0]; 00701 } 00702 else { 00703 watchdog('T_fasta_loader', "Failed to retreive newly inserted feature '%name (%uname)'", 00704 array('%name' => $name, '%uname' => $numane), WATCHDOG_ERROR); 00705 return 0; 00706 } 00707 } 00708 00709 // if we don't have a feature and the user wants to do an update then fail 00710 if (!$feature and (strcmp($method, 'Update only')==0 or drupal_strcmp($method, 'Insert and update')==0)) { 00711 watchdog('T_fasta_loader', "Failed to find feature '%name' ('%uname') while matching on " . 00712 drupal_strtolower($match_type), array('%name' => $name, '%uname' => $uname), WATCHDOG_ERROR); 00713 return 0; 00714 } 00715 00716 // if we do have a feature and this is an update then proceed with the update 00717 if ($feature and !$inserted and (strcmp($method, 'Update only')==0 or strcmp($method, 'Insert and update')==0)) { 00718 // if the user wants to match on the Name field 00719 if (strcmp($match_type, 'Name')==0) { 00720 // if we're matching on the name but do not have a unique name then we don't want to update the uniquename. 00721 $values = array(); 00722 if ($uname) { 00723 // first check to make sure that by changing the unique name of this feature that we won't conflict with 00724 // another existing feature of the same name 00725 $values = array( 00726 'organism_id' => $organism_id, 00727 'uniquename' => $uname, 00728 'type_id' => $cvterm->cvterm_id, 00729 ); 00730 $options = array('statement_name' => 'sel_feature_oruqty'); 00731 $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options); 00732 if (count($results) > 0) { 00733 watchdog('T_fasta_loader', "Cannot update the feature '%name' with a uniquename of '%uname' and type of '%type' as it 00734 conflicts with an existing feature with the same uniquename and type.", 00735 array('%name' => $name, '%uname' => $uname, '%type' => $type)); 00736 return 0; 00737 } 00738 00739 // the changes to the uniquename don't conflict so proceed with the update 00740 $values = array( 00741 'uniquename' => $uname, 00742 'residues' => $residues, 00743 'seqlen' => drupal_strlen($residues), 00744 'md5checksum' => md5($residues), 00745 'is_analysis' => 'false', 00746 'is_obsolete' => 'false', 00747 ); 00748 $match = array( 00749 'name' => $name, 00750 'organism_id' => $organism_id, 00751 'type_id' => $cvterm->cvterm_id, 00752 ); 00753 $options = array('statement_name' => 'upd_feature_resemdisis_naorty_un'); 00754 } 00755 // if we do not have a new unique name then don't change the existing uniquename field 00756 else { 00757 $values = array( 00758 'residues' => $residues, 00759 'seqlen' => drupal_strlen($residues), 00760 'md5checksum' => md5($residues), 00761 'is_analysis' => 'false', 00762 'is_obsolete' => 'false', 00763 ); 00764 $match = array( 00765 'name' => $name, 00766 'organism_id' => $organism_id, 00767 'type_id' => $cvterm->cvterm_id, 00768 ); 00769 $options = array('statement_name' => 'upd_feature_unresemdisis_naorty'); 00770 } 00771 00772 // perform the update 00773 $success = tripal_core_chado_update('feature', $match, $values, $options); 00774 if (!$success) { 00775 watchdog('T_fasta_loader', "Failed to update feature '%name' ('%name')", 00776 array('%name' => $name, '%uiname' => $uname), WATCHDOG_ERROR); 00777 return 0; 00778 } 00779 } 00780 if (strcmp($match_type, 'Unique name')==0) { 00781 // if we're matching on the uniquename but do not have a new name then we don't want to update the name. 00782 $values = array(); 00783 if ($name) { 00784 $values = array( 00785 'name' => $name, 00786 'residues' => $residues, 00787 'seqlen' => drupal_strlen($residues), 00788 'md5checksum' => md5($residues), 00789 'is_analysis' => 'false', 00790 'is_obsolete' => 'false', 00791 ); 00792 $match = array( 00793 'uniquename' => $uname, 00794 'organism_id' => $organism_id, 00795 'type_id' => $cvterm->cvterm_id, 00796 ); 00797 $options = array('statement_name' => 'upd_feature_resemdisis_unorty_na'); 00798 } 00799 // if we have a unique name then update it after matching by the name 00800 else { 00801 $values = array( 00802 'residues' => $residues, 00803 'seqlen' => drupal_strlen($residues), 00804 'md5checksum' => md5($residues), 00805 'is_analysis' => 'false', 00806 'is_obsolete' => 'false', 00807 ); 00808 $match = array( 00809 'uniquename' => $uname, 00810 'organism_id' => $organism_id, 00811 'type_id' => $cvterm->cvterm_id, 00812 ); 00813 $options = array('statement_name' => 'upd_feature_naresemdisis_unorty'); 00814 } 00815 $success = tripal_core_chado_update('feature', $match, $values, $options); 00816 if (!$success) { 00817 watchdog('T_fasta_loader', "Failed to update feature '%name' ('%name')", 00818 array('%name' => $name, '%uiname' => $uname), WATCHDOG_ERROR); 00819 return 0; 00820 } 00821 } 00822 } 00823 00824 // add in the analysis link 00825 if ($analysis_id) { 00826 // if the association doens't alredy exist then add one 00827 $values = array( 00828 'analysis_id' => $analysis_id, 00829 'feature_id' => $feature->feature_id, 00830 ); 00831 $sel_options = array('statement_name' => 'sel_analysisfeature_anfe'); 00832 $results = tripal_core_chado_select('analysisfeature', array('analysisfeature_id'), $values, $sel_options); 00833 if (count($results) == 0) { 00834 $ins_options = array('statement_name' => 'ins_analysisfeature_anfe'); 00835 $success = tripal_core_chado_insert('analysisfeature', $values, $ins_options); 00836 if (!$success) { 00837 watchdog('T_fasta_loader', "Failed to associate analysis and feature '%name' ('%name')", 00838 array('%name' => $name, '%uname' => $uname), WATCHDOG_ERROR); 00839 return 0; 00840 } 00841 } 00842 } 00843 00844 // now add the database cross reference 00845 if ($db_id) { 00846 // check to see if this accession reference exists, if not add it 00847 $values = array( 00848 'db_id' => $db_id, 00849 'accession' => $accession 00850 ); 00851 $sel_options = array('statement_name' => 'sel_dbxref_dbac'); 00852 $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $sel_options); 00853 // if the accession doesn't exist then add it 00854 if (count($results) == 0) { 00855 $ins_options = array('statement_name' => 'ins_dbxref_dbac'); 00856 $results = tripal_core_chado_insert('dbxref', $values, $ins_options); 00857 if (!$results) { 00858 watchdog('T_fasta_loader', "Failed to add database accession '%accession'", 00859 array('%accession' => $accession), WATCHDOG_ERROR); 00860 return 0; 00861 } 00862 $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $sel_options); 00863 if (count($results) == 1) { 00864 $dbxref = $results[0]; 00865 } 00866 else { 00867 watchdog('T_fasta_loader', "Failed to retreive newly inserted dbxref '%name (%uname)'", 00868 array('%name' => $name, '%uname' => $numane), WATCHDOG_ERROR); 00869 return 0; 00870 } 00871 } 00872 else { 00873 $dbxref = $results[0]; 00874 } 00875 00876 // check to see if the feature dbxref record exists if not, then add it 00877 $values = array( 00878 'feature_id' => $feature->feature_id, 00879 'dbxref_id' => $dbxref->dbxref_id 00880 ); 00881 $sel_options = array('statement_name' => 'sel_featuredbxref_fedb'); 00882 $results = tripal_core_chado_select('feature_dbxref', array('feature_dbxref_id'), $values, $sel_options); 00883 if (count($results) == 0) { 00884 $ins_options = array('statement_name' => 'ins_featuredbxref_fedb'); 00885 $success = tripal_core_chado_insert('feature_dbxref', $values, $ins_options); 00886 if (!$success) { 00887 watchdog('T_fasta_loader', "Failed to add associate database accession '%accession' with feature", 00888 array('%accession' => $accession), WATCHDOG_ERROR); 00889 return 0; 00890 } 00891 } 00892 } 00893 00894 // now add in the relationship if one exists. If not, then add it 00895 if ($rel_type) { 00896 $values = array( 00897 'organism_id' => $organism_id, 00898 'uniquename' => $parent, 00899 'type_id' => $parentcvterm->cvterm_id, 00900 ); 00901 $options = array('statement_name' => 'sel_feature_oruqty'); 00902 $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options); 00903 if (count($results) != 1) { 00904 watchdog('T_fasta_loader', "Cannot find a unique fature for the parent '%parent' of type 00905 '%type' for the feature.", array('%parent' => $parent, '%type' => $parent_type)); 00906 return 0; 00907 } 00908 $parent_feature = $results[0]; 00909 00910 // check to see if the relationship already exists if not then add it 00911 $values = array( 00912 'subject_id' => $feature->feature_id, 00913 'object_id' => $parent_feature->feature_id, 00914 'type_id' => $relcvterm->cvterm_id, 00915 ); 00916 $sel_options = array('statement_name' => 'sel_featurerelationship_suojty'); 00917 $results = tripal_core_chado_select('feature_relationship', array('feature_relationship_id'), $values, $sel_options); 00918 if (count($results) == 0) { 00919 $ins_options = array('statement_name' => 'ins_featurerelationship_suojty'); 00920 $success = tripal_core_chado_insert('feature_relationship', $values, $ins_options); 00921 if (!$success) { 00922 watchdog('T_fasta_loader', "Failed to add associate database accession '%accession' with feature", 00923 array('%accession' => $accession), WATCHDOG_ERROR); 00924 return 0; 00925 } 00926 } 00927 } 00928 } 00929