Tripal v1.0 (6.x-1.0)
|
00001 <?php 00016 function tripal_cv_obo_form(&$form_state = NULL) { 00017 00018 // get a list of db from chado for user to choose 00019 $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name"; 00020 $results = db_query($sql); 00021 00022 $obos = array(); 00023 $obos[] = ''; 00024 while ($obo = db_fetch_object($results)) { 00025 // $obos[$obo->obo_id] = "$obo->name | $obo->path"; 00026 $obos[$obo->obo_id] = $obo->name; 00027 } 00028 00029 $form['obo_existing'] = array( 00030 '#type' => 'fieldset', 00031 '#title' => t('Use a Saved Ontology OBO Reference') 00032 ); 00033 00034 $form['obo_new'] = array( 00035 '#type' => 'fieldset', 00036 '#title' => t('Use a New Ontology OBO Reference') 00037 ); 00038 00039 $form['obo_existing']['existing_instructions']= array( 00040 '#value' => t('The Ontology OBO files listed in the drop down below have been automatically added upon 00041 installation of the Tripal CV module or were added from a previous upload. Select 00042 an OBO, then click the submit button to load the vocabulary into the database. If the 00043 vocabularies already exist then the ontology will be updated.'), 00044 '#weight' => -1 00045 ); 00046 00047 $form['obo_existing']['obo_id'] = array( 00048 '#title' => t('Ontology OBO File Reference'), 00049 '#type' => 'select', 00050 '#options' => $obos, 00051 '#weight' => 0 00052 ); 00053 00054 $form['obo_new']['path_instructions']= array( 00055 '#value' => t('Provide the name and path for the OBO file. If the vocabulary OBO file 00056 is stored local to the server provide a file name. If the vocabulry is stored remotely, 00057 provide a URL. Only provide a URL or a local file, not both.'), 00058 '#weight' => 0 00059 ); 00060 00061 $form['obo_new']['obo_name']= array( 00062 '#type' => 'textfield', 00063 '#title' => t('New Vocabulary Name'), 00064 '#description' => t('Please provide a name for this vocabulary. After upload, this name will appear in the drop down 00065 list above for use again later.'), 00066 '#weight' => 1 00067 ); 00068 00069 $form['obo_new']['obo_url']= array( 00070 '#type' => 'textfield', 00071 '#title' => t('Remote URL'), 00072 '#description' => t('Please enter a URL for the online OBO file. The file will be downloaded and parsed. 00073 (e.g. http://www.obofoundry.org/ro/ro.obo'), 00074 '#default_value' => $default_desc, 00075 '#weight' => 2 00076 ); 00077 00078 $form['obo_new']['obo_file']= array( 00079 '#type' => 'textfield', 00080 '#title' => t('Local File'), 00081 '#description' => t('Please enter the full system path for an OBO definition file, or a path within the Drupal 00082 installation (e.g. /sites/default/files/xyz.obo). The path must be accessible to the 00083 server on which this Drupal instance is running.'), 00084 '#default_value' => $default_desc, 00085 '#weight' => 3 00086 ); 00087 00088 $form['submit'] = array( 00089 '#type' => 'submit', 00090 '#value' => t('Submit'), 00091 '#weight' => 5, 00092 '#executes_submit_callback' => TRUE, 00093 ); 00094 00095 $form['#redirect'] = 'admin/tripal/tripal_cv/obo_loader'; 00096 00097 return $form; 00098 } 00099 00106 function tripal_cv_obo_form_submit($form, &$form_state) { 00107 global $user; 00108 00109 $obo_id = $form_state['values']['obo_id']; 00110 $obo_name = $form_state['values']['obo_name']; 00111 $obo_url = $form_state['values']['obo_url']; 00112 $obo_file = $form_state['values']['obo_file']; 00113 00114 $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d"; 00115 $obo = db_fetch_object(db_query($sql, $obo_id)); 00116 00117 if ($obo_id) { 00118 $args = array($obo_id); 00119 tripal_add_job("Load OBO $obo->name", 'tripal_cv', 00120 "tripal_cv_load_obo_v1_2_id", $args, $user->uid); 00121 } 00122 else { 00123 if ($obo_url) { 00124 $args = array($obo_name, $obo_url); 00125 tripal_add_job("Load OBO $obo_name", 'tripal_cv', 00126 "tripal_cv_load_obo_v1_2_url", $args, $user->uid); 00127 } 00128 elseif ($obo_file) { 00129 $args = array($obo_name, $obo_file); 00130 tripal_add_job("Load OBO $obo_name", 'tripal_cv', 00131 "tripal_cv_load_obo_v1_2_file", $args, $user->uid); 00132 } 00133 } 00134 } 00135 00141 function tripal_cv_cvtermpath_form() { 00142 00143 // get a list of db from chado for user to choose 00144 $sql = "SELECT * FROM {cv} WHERE NOT name = 'tripal' ORDER BY name "; 00145 $results = chado_query($sql); 00146 00147 $cvs = array(); 00148 $cvs[] = ''; 00149 while ($cv = db_fetch_object($results)) { 00150 $cvs[$cv->cv_id] = $cv->name; 00151 } 00152 00153 $form['cvid'] = array( 00154 '#title' => t('Controlled Vocabulary/Ontology Name'), 00155 '#type' => 'select', 00156 '#options' => $cvs, 00157 '#description' => t('The Chado cvtermpath is a database table that provides lineage for ontology terms 00158 and is useful for quickly finding any ancestor parent of a term. This table must be populated for each 00159 ontology. Select a controlled vocabulary for which you would like to upate the cvtermpath.'), 00160 ); 00161 00162 $form['description'] = array( 00163 '#type' => 'item', 00164 '#value' => t("Submit a job to update chado cvtermpath table."), 00165 '#weight' => 1, 00166 ); 00167 00168 $form['button'] = array( 00169 '#type' => 'submit', 00170 '#value' => t('Update cvtermpath'), 00171 '#weight' => 2, 00172 ); 00173 00174 return $form; 00175 } 00176 00177 00182 function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) { 00183 00184 // get the OBO reference 00185 $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d"; 00186 $obo = db_fetch_object(db_query($sql, $obo_id)); 00187 00188 // if the reference is for a remote URL then run the URL processing function 00189 if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) { 00190 tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0); 00191 } 00192 // if the reference is for a local file then run the file processing function 00193 else { 00194 // check to see if the file is located local to Drupal 00195 $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path; 00196 if (file_exists($dfile)) { 00197 tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0); 00198 } 00199 // if not local to Drupal, the file must be someplace else, just use 00200 // the full path provided 00201 else { 00202 if (file_exists($obo->path)) { 00203 tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0); 00204 } 00205 else { 00206 print "ERROR: counld not find OBO file: '$obo->path'\n"; 00207 } 00208 } 00209 } 00210 } 00211 00216 function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) { 00217 $newcvs = array(); 00218 00219 // TODO: need better error detection 00220 00221 tripal_cv_load_obo_v1_2($file, $jobid, $newcvs); 00222 if ($is_new) { 00223 tripal_cv_load_obo_add_ref($obo_name, $file); 00224 } 00225 print "Ontology Sucessfully loaded!\n"; 00226 00227 // update the cvtermpath table 00228 tripal_cv_load_update_cvtermpath($newcvs, $jobid); 00229 00230 00231 } 00232 00237 function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) { 00238 00239 $newcvs = array(); 00240 00241 // first download the OBO 00242 $temp = tempnam(sys_get_temp_dir(), 'obo_'); 00243 print "Downloading URL $url, saving to $temp\n"; 00244 $url_fh = fopen($url, "r"); 00245 $obo_fh = fopen($temp, "w"); 00246 if (!$url_fh) { 00247 tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ". 00248 " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ". 00249 " specify the location of the file on your server."); 00250 00251 } 00252 while (!feof($url_fh)) { 00253 fwrite($obo_fh, fread($url_fh, 255), 255); 00254 } 00255 fclose($url_fh); 00256 fclose($obo_fh); 00257 00258 // second, parse the OBO 00259 tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs); 00260 00261 // now remove the temp file 00262 unlink($temp); 00263 00264 if ($is_new) { 00265 tripal_cv_load_obo_add_ref($obo_name, $url); 00266 } 00267 00268 // update the cvtermpath table 00269 tripal_cv_load_update_cvtermpath($newcvs, $jobid); 00270 00271 print "Ontology Sucessfully loaded!\n"; 00272 } 00273 00278 function tripal_cv_load_update_cvtermpath($newcvs, $jobid) { 00279 00280 print "\nUpdating cvtermpath table. This may take a while...\n"; 00281 foreach ($newcvs as $namespace => $cvid) { 00282 tripal_cv_update_cvtermpath($cvid, $jobid); 00283 } 00284 } 00285 00289 function tripal_cv_load_obo_add_ref($name, $path) { 00290 $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')"; 00291 db_query($isql, $name, $path); 00292 } 00293 00298 function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) { 00299 00300 $header = array(); 00301 00302 // make sure our temporary table exists 00303 $ret = array(); 00304 if (!db_table_exists('tripal_obo_temp')) { 00305 $schema = tripal_cv_get_custom_tables('tripal_obo_temp'); 00306 $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']); 00307 if (!$success) { 00308 watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR); 00309 return; 00310 } 00311 } 00312 // empty the temp table 00313 $sql = "DELETE FROM tripal_obo_temp"; 00314 chado_query($sql); 00315 00316 // get a persistent connection 00317 $connection = tripal_db_persistent_chado(); 00318 if (!$connection) { 00319 print "A persistant connection was not obtained. Loading will be slow\n"; 00320 } 00321 00322 // if we cannot get a connection then let the user know the loading will be slow 00323 tripal_db_start_transaction(); 00324 if ($connection) { 00325 print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" . 00326 "If the load fails or is terminated prematurely then the entire set of \n" . 00327 "insertions/updates is rolled back and will not be found in the database\n\n"; 00328 } 00329 00330 print "Step 1: Preloading File $file\n"; 00331 00332 // make sure we have an 'internal' and a '_global' database 00333 if (!tripal_db_add_db('internal')) { 00334 tripal_cv_obo_quiterror("Cannot add 'internal' database"); 00335 } 00336 if (!tripal_db_add_db('_global')) { 00337 tripal_cv_obo_quiterror("Cannot add '_global' database"); 00338 } 00339 00340 // parse the obo file 00341 $default_db = tripal_cv_obo_parse($file, $header, $jobid); 00342 00343 // add the CV for this ontology to the database 00344 $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], ''); 00345 if (!$defaultcv) { 00346 tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]); 00347 } 00348 $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id; 00349 00350 // add any typedefs to the vocabulary first 00351 $sql = " 00352 SELECT * FROM tripal_obo_temp 00353 WHERE type = 'Typedef' 00354 "; 00355 $typedefs = chado_query($sql); 00356 while ($typedef = db_fetch_object($typedefs)) { 00357 $term = unserialize(base64_decode($typedef->stanza)); 00358 tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db); 00359 } 00360 00361 // next add terms to the vocabulary 00362 print "\nStep 2: Loading terms...\n"; 00363 if (!tripal_cv_obo_process_terms($defaultcv->name, $jobid, $newcvs, $default_db)) { 00364 tripal_cv_obo_quiterror('Cannot add terms from this ontology'); 00365 } 00366 00367 // transaction is complete 00368 tripal_db_commit_transaction(); 00369 return; 00370 } 00371 00376 function tripal_cv_obo_quiterror($message) { 00377 00378 watchdog("T_obo_loader", $message, array(), WATCHDOG_ERROR);; 00379 exit; 00380 00381 } 00382 00387 function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) { 00388 00389 $i = 0; 00390 00391 // iterate through each term from the OBO file and add it 00392 $sql = " 00393 SELECT * FROM tripal_obo_temp 00394 WHERE type = 'Term' 00395 ORDER BY id 00396 "; 00397 $terms = chado_query($sql); 00398 $count = pg_num_rows($terms); 00399 00400 // calculate the interval for updates 00401 $interval = intval($count * 0.0001); 00402 if ($interval < 1) { 00403 $interval = 1; 00404 } 00405 while($t = db_fetch_object($terms)) { 00406 $term = unserialize(base64_decode($t->stanza)); 00407 00408 // update the job status every interval 00409 if ($jobid and $i % $interval == 0) { 00410 $complete = ($i / $count) * 50; 00411 tripal_job_set_progress($jobid + 50, intval($complete)); 00412 printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage())); 00413 } 00414 00415 // add/update this term 00416 if (!tripal_cv_obo_process_term($term, $defaultcv, 0, $newcvs, $default_db)) { 00417 tripal_cv_obo_quiterror("Failed to process terms from the ontology"); 00418 } 00419 00420 $i++; 00421 } 00422 00423 // set the final status 00424 if ($jobid) { 00425 $complete = ($i / $count) * 50; 00426 tripal_job_set_progress($jobid + 50, intval($complete)); 00427 printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage())); 00428 } 00429 00430 return 1; 00431 } 00432 00437 function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) { 00438 00439 // construct the term array for sending to the tripal_cv_add_cvterm function 00440 // for adding a new cvterm 00441 $t = array(); 00442 $t['id'] = $term['id'][0]; 00443 $t['name'] = $term['name'][0]; 00444 if (array_key_exists('def', $term)) { 00445 $t['def'] = $term['def'][0]; 00446 } 00447 if (array_key_exists('subset', $term)) { 00448 $t['subset'] = $term['subset'][0]; 00449 } 00450 if (array_key_exists('namespace', $term)) { 00451 $t['namespace'] = $term['namespace'][0]; 00452 } 00453 if (array_key_exists('is_obsolete', $term)) { 00454 $t['is_obsolete'] = $term['is_obsolete'][0]; 00455 } 00456 00457 // add the cvterm 00458 $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db); 00459 if (!$cvterm) { 00460 tripal_cv_obo_quiterror("Cannot add the term " . $term['id']); 00461 } 00462 00463 if (array_key_exists('namespace', $term)) { 00464 $newcvs[$term['namespace'][0]] = $cvterm->cv_id; 00465 } 00466 00467 // now handle other properites 00468 if (array_key_exists('is_anonymous', $term)) { 00469 //print "WARNING: unhandled tag: is_anonymous\n"; 00470 } 00471 if (array_key_exists('alt_id', $term)) { 00472 foreach ($term['alt_id'] as $alt_id) { 00473 if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) { 00474 tripal_cv_obo_quiterror("Cannot add alternate id $alt_id"); 00475 } 00476 } 00477 } 00478 00479 if (array_key_exists('subset', $term)) { 00480 //print "WARNING: unhandled tag: subset\n"; 00481 } 00482 // add synonyms for this cvterm 00483 if (array_key_exists('synonym', $term)) { 00484 if (!tripal_cv_obo_add_synonyms($term, $cvterm)) { 00485 tripal_cv_obo_quiterror("Cannot add synonyms"); 00486 } 00487 } 00488 00489 // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym' 00490 // types to be of the v1.2 standard 00491 if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) { 00492 if (array_key_exists('exact_synonym', $term)) { 00493 foreach ($term['exact_synonym'] as $synonym) { 00494 $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym); 00495 $term['synonym'][] = $new; 00496 } 00497 } 00498 if (array_key_exists('narrow_synonym', $term)) { 00499 foreach ($term['narrow_synonym'] as $synonym) { 00500 $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym); 00501 $term['synonym'][] = $new; 00502 } 00503 } 00504 if (array_key_exists('broad_synonym', $term)) { 00505 foreach ($term['broad_synonym'] as $synonym) { 00506 $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym); 00507 $term['synonym'][] = $new; 00508 } 00509 } 00510 00511 if (!tripal_cv_obo_add_synonyms($term, $cvterm)) { 00512 tripal_cv_obo_quiterror("Cannot add/update synonyms"); 00513 } 00514 } 00515 00516 // add the comment to the cvtermprop table 00517 if (array_key_exists('comment', $term)) { 00518 $comments = $term['comment']; 00519 $j = 0; 00520 foreach ($comments as $comment) { 00521 if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) { 00522 tripal_cv_obo_quiterror("Cannot add/update cvterm property"); 00523 } 00524 $j++; 00525 } 00526 } 00527 00528 // add any other external dbxrefs 00529 if (array_key_exists('xref', $term)) { 00530 foreach ($term['xref'] as $xref) { 00531 if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) { 00532 tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref)."); 00533 } 00534 } 00535 } 00536 00537 if (array_key_exists('xref_analog', $term)) { 00538 foreach ($term['xref_analog'] as $xref) { 00539 if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) { 00540 tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref)."); 00541 } 00542 } 00543 } 00544 if (array_key_exists('xref_unk', $term)) { 00545 foreach ($term['xref_unk'] as $xref) { 00546 if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) { 00547 tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref)."); 00548 } 00549 } 00550 } 00551 00552 // add is_a relationships for this cvterm 00553 if (array_key_exists('is_a', $term)) { 00554 foreach ($term['is_a'] as $is_a) { 00555 if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) { 00556 tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a"); 00557 } 00558 } 00559 } 00560 00561 if (array_key_exists('intersection_of', $term)) { 00562 //print "WARNING: unhandled tag: intersection_of\n"; 00563 } 00564 if (array_key_exists('union_of', $term)) { 00565 //print "WARNING: unhandled tag: union_on\n"; 00566 } 00567 if (array_key_exists('disjoint_from', $term)) { 00568 //print "WARNING: unhandled tag: disjoint_from\n"; 00569 } 00570 if (array_key_exists('relationship', $term)) { 00571 foreach ($term['relationship'] as $value) { 00572 $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value); 00573 $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value); 00574 if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) { 00575 tripal_cv_obo_quiterror("Cannot add relationship $rel: $object"); 00576 } 00577 } 00578 } 00579 if (array_key_exists('replaced_by', $term)) { 00580 //print "WARNING: unhandled tag: replaced_by\n"; 00581 } 00582 if (array_key_exists('consider', $term)) { 00583 //print "WARNING: unhandled tag: consider\n"; 00584 } 00585 if (array_key_exists('use_term', $term)) { 00586 //print "WARNING: unhandled tag: user_term\n"; 00587 } 00588 if (array_key_exists('builtin', $term)) { 00589 //print "WARNING: unhandled tag: builtin\n"; 00590 } 00591 return 1; 00592 } 00593 00599 function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, 00600 $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') { 00601 00602 // make sure the relationship cvterm exists 00603 $term = array( 00604 'name' => $rel, 00605 'id' => "$default_db:$rel", 00606 'definition' => '', 00607 'is_obsolete' => 0, 00608 ); 00609 $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, $default_db); 00610 00611 if (!$relcvterm) { 00612 // if the relationship term couldn't be found in the default_db provided 00613 // then do on more check to find it in the relationship ontology 00614 $term = array( 00615 'name' => $rel, 00616 'id' => "OBO_REL:$rel", 00617 'definition' => '', 00618 'is_obsolete' => 0, 00619 ); 00620 $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, 'OBO_REL'); 00621 if (!$relcvterm) { 00622 tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n"); 00623 } 00624 } 00625 00626 // get the object term 00627 $oterm = tripal_cv_obo_get_term($objname); 00628 if (!$oterm) { 00629 tripal_cv_obo_quiterror("Could not find object term $objname\n"); 00630 } 00631 00632 $objterm = array(); 00633 $objterm['id'] = $oterm['id'][0]; 00634 $objterm['name'] = $oterm['name'][0]; 00635 if (array_key_exists('def', $oterm)) { 00636 $objterm['def'] = $oterm['def'][0]; 00637 } 00638 if (array_key_exists('subset', $oterm)) { 00639 $objterm['subset'] = $oterm['subset'][0]; 00640 } 00641 if (array_key_exists('namespace', $oterm)) { 00642 $objterm['namespace'] = $oterm['namespace'][0]; 00643 } 00644 if (array_key_exists('is_obsolete', $oterm)) { 00645 $objterm['is_obsolete'] = $oterm['is_obsolete'][0]; 00646 } 00647 $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db); 00648 if (!$objcvterm) { 00649 tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]); 00650 } 00651 00652 // check to see if the cvterm_relationship already exists, if not add it 00653 $values = array( 00654 'type_id' => $relcvterm->cvterm_id, 00655 'subject_id' => $cvterm->cvterm_id, 00656 'object_id' => $objcvterm->cvterm_id 00657 ); 00658 $options = array('statement_name' => 'sel_cvtermrelationship_tysuob'); 00659 $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options); 00660 if (count($result) == 0) { 00661 $options = array( 00662 'statement_name' => 'ins_cvtermrelationship_tysuob', 00663 'return_record' => FALSE 00664 ); 00665 $success = tripal_core_chado_insert('cvterm_relationship', $values, $options); 00666 if (!$success) { 00667 tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'"); 00668 } 00669 } 00670 00671 return TRUE; 00672 } 00673 00678 function tripal_cv_obo_get_term($id) { 00679 $values = array('id' => $id); 00680 $options = array('statement_name' => 'sel_tripalobotemp_id'); 00681 $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options); 00682 if (count($result) == 0) { 00683 return FALSE; 00684 } 00685 return unserialize(base64_decode($result[0]->stanza)); 00686 } 00687 00692 function tripal_cv_obo_add_synonyms($term, $cvterm) { 00693 00694 // make sure we have a 'synonym_type' vocabulary 00695 $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.'); 00696 00697 // now add the synonyms 00698 if (array_key_exists('synonym', $term)) { 00699 foreach ($term['synonym'] as $synonym) { 00700 00701 // separate out the synonym definition and the synonym type 00702 $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym); 00703 // the scope will be 'EXACT', etc... 00704 $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym)); 00705 if (!$scope) { // if no scope then default to 'exact' 00706 $scope = 'exact'; 00707 } 00708 00709 // make sure the synonym type exists in the 'synonym_type' vocabulary 00710 $values = array( 00711 'name' => $scope, 00712 'cv_id' => array( 00713 'name' => 'synonym_type', 00714 ), 00715 ); 00716 $options = array('statement_name' => 'sel_cvterm_nacv', 'is_updlicate' => 1); 00717 $results = tripal_core_chado_select('cvterm', array('*'), $values, $options); 00718 00719 // if it doesn't exist then add it 00720 if (!$results) { 00721 // build a 'term' object so we can add the missing term 00722 $term = array( 00723 'name' => $scope, 00724 'id' => "internal:$scope", 00725 'definition' => '', 00726 'is_obsolete' => 0, 00727 ); 00728 $syntype = tripal_cv_add_cvterm($term, $syncv->name, 0, 1); 00729 if (!$syntype) { 00730 tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope"); 00731 } 00732 } 00733 else { 00734 $syntype = $results[0]; 00735 } 00736 00737 // make sure the synonym doesn't already exists 00738 $values = array( 00739 'cvterm_id' => $cvterm->cvterm_id, 00740 'synonym' => $def 00741 ); 00742 $options = array('statement_name' => 'sel_cvtermsynonym_cvsy'); 00743 $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options); 00744 if (count($results) == 0) { 00745 $values = array( 00746 'cvterm_id' => $cvterm->cvterm_id, 00747 'synonym' => $def, 00748 'type_id' => $syntype->cvterm_id 00749 ); 00750 $options = array( 00751 'statement_name' => 'ins_cvtermsynonym_cvsy', 00752 'return_record' => FALSE 00753 ); 00754 $success = tripal_core_chado_insert('cvtermsynonym', $values, $options); 00755 if (!$success) { 00756 tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)"); 00757 } 00758 } 00759 00760 // now add the dbxrefs for the synonym if we have a comma in the middle 00761 // of a description then this will cause problems when splitting os lets 00762 // just change it so it won't mess up our splitting and then set it back 00763 // later. 00774 } 00775 } 00776 00777 return TRUE; 00778 } 00779 00785 function tripal_cv_obo_parse($obo_file, &$header, $jobid) { 00786 $in_header = 1; 00787 $stanza = array(); 00788 $default_db = '_global'; 00789 $line_num = 0; 00790 $num_read = 0; 00791 $intv_read = 0; 00792 00793 $filesize = filesize($obo_file); 00794 $interval = intval($filesize * 0.01); 00795 if ($interval < 1) { 00796 $interval = 1; 00797 } 00798 00799 // iterate through the lines in the OBO file and parse the stanzas 00800 $fh = fopen($obo_file, 'r'); 00801 while ($line = fgets($fh)) { 00802 00803 $line_num++; 00804 $size = drupal_strlen($line); 00805 $num_read += $size; 00806 $intv_read += $size; 00807 $line = trim($line); 00808 00809 // update the job status every 1% features 00810 if ($jobid and $intv_read >= $interval) { 00811 $percent = sprintf("%.2f", ($num_read / $filesize) * 100); 00812 print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r"; 00813 tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 50)); 00814 $intv_read = 0; 00815 } 00816 00817 // remove newlines 00818 $line = rtrim($line); 00819 00820 // remove any special characters that may be hiding 00821 $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line); 00822 00823 // skip empty lines 00824 if (strcmp($line, '') == 0) { 00825 continue; 00826 } 00827 00828 //remove comments from end of lines 00829 $line = preg_replace('/^(.*?)\!.*$/', '\1', $line); // TODO: if the explamation is escaped 00830 00831 // at the first stanza we're out of header 00832 if (preg_match('/^\s*\[/', $line)) { 00833 $in_header = 0; 00834 00835 // store the stanza we just finished reading 00836 if (sizeof($stanza) > 0) { 00837 // add the term to the temp table 00838 $values = array( 00839 'id' => $stanza['id'][0], 00840 'stanza' => base64_encode(serialize($stanza)), 00841 'type' => $type, 00842 ); 00843 $options = array('statement_name' => 'ins_tripalobotemp_all'); 00844 $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options); 00845 if (!$success) { 00846 watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error'); 00847 exit; 00848 } 00849 00850 } 00851 // get the stanza type: Term, Typedef or Instance 00852 $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line); 00853 00854 // start fresh with a new array 00855 $stanza = array(); 00856 continue; 00857 } 00858 // break apart the line into the tag and value but ignore any escaped colons 00859 preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons 00860 $pair = explode(":", $line, 2); 00861 $tag = $pair[0]; 00862 $value = ltrim(rtrim($pair[1]));// remove surrounding spaces 00863 00864 // if this is the ID then look for the default DB 00865 $matches = array(); 00866 if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) { 00867 $default_db = $matches[1]; 00868 } 00869 00870 $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon 00871 $value = preg_replace("/\|-\|-\|/", "\:", $value); 00872 if ($in_header) { 00873 if (!array_key_exists($tag, $header)) { 00874 $header[$tag] = array(); 00875 } 00876 $header[$tag][] = $value; 00877 } 00878 else { 00879 if (!array_key_exists($tag, $stanza)) { 00880 $stanza[$tag] = array(); 00881 } 00882 $stanza[$tag][] = $value; 00883 } 00884 } 00885 // now add the last term in the file 00886 if (sizeof($stanza) > 0) { 00887 $values = array( 00888 'id' => $stanza['id'][0], 00889 'stanza' => base64_encode(serialize($stanza)), 00890 'type' => $type, 00891 ); 00892 $options = array('statement_name' => 'ins_tripalobotemp_all'); 00893 tripal_core_chado_insert('tripal_obo_temp', $values, $options); 00894 if (!$success) { 00895 watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error'); 00896 exit; 00897 } 00898 } 00899 return $default_db; 00900 } 00901 00907 function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) { 00908 00909 $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref); 00910 $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref); 00911 $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref); 00912 $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref); 00913 00914 if (!$accession) { 00915 tripal_cv_obo_quiterror(); 00916 watchdog("T_obo_loader", "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING); 00917 return FALSE; 00918 } 00919 00920 // if the xref is a database link, handle that specially 00921 if (strcmp($dbname, 'http') == 0) { 00922 $accession = $xref; 00923 $dbname = 'URL'; 00924 } 00925 00926 // add the database 00927 $db = tripal_db_add_db($dbname); 00928 if (!$db) { 00929 tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado."); 00930 } 00931 00932 // now add the dbxref 00933 $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description); 00934 if (!$dbxref) { 00935 tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)"); 00936 } 00937 00938 // finally add the cvterm_dbxref but first check to make sure it exists 00939 $values = array( 00940 'cvterm_id' => $cvterm->cvterm_id, 00941 'dbxref_id' => $dbxref->dbxref_id, 00942 ); 00943 $options = array('statement_name' => 'sel_cvtermdbxref_cvdb'); 00944 $result = tripal_core_chado_select('cvterm_dbxref', array('*'), $values, $options); 00945 if (count($result) == 0) { 00946 $ins_options = array( 00947 'statement_name' => 'ins_cvtermdbxref_cvdb', 00948 'return_record' => FALSE 00949 ); 00950 $result = tripal_core_chado_insert('cvterm_dbxref', $values, $ins_options); 00951 if (!$result){ 00952 tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref"); 00953 return FALSE; 00954 } 00955 } 00956 00957 return TRUE; 00958 } 00959 00964 function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) { 00965 00966 // make sure the 'cvterm_property_type' CV exists 00967 $cv = tripal_cv_add_cv('cvterm_property_type', ''); 00968 if (!$cv) { 00969 tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm"); 00970 } 00971 00972 // get the property type cvterm. If it doesn't exist then we want to add it 00973 $values = array( 00974 'name' => $property, 00975 'cv_id' => $cv->cv_id, 00976 ); 00977 $options = array('statement_name' => 'sel_cvterm_nacv_na'); 00978 $results = tripal_core_chado_select('cvterm', array('*'), $values, $options); 00979 if (count($results) == 0) { 00980 $term = array( 00981 'name' => $property, 00982 'id' => "internal:$property", 00983 'definition' => '', 00984 'is_obsolete' => 0, 00985 ); 00986 $cvproptype = tripal_cv_add_cvterm($term, $cv->name, 0, 0); 00987 if (!$cvproptype) { 00988 tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property"); 00989 return FALSE; 00990 } 00991 } 00992 else { 00993 $cvproptype = $results[0]; 00994 } 00995 00996 // remove any properties that currently exist for this term. We'll reset them 00997 if ($rank == 0) { 00998 $values = array('cvterm_id' => $cvterm->cvterm_id); 00999 $options = array('statement_name' => 'del_cvtermprop_cv'); 01000 $success = tripal_core_chado_delete('cvtermprop', $values, $options); 01001 if (!$success) { 01002 tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n"); 01003 return FALSE; 01004 } 01005 } 01006 01007 // now add the property 01008 $values = array( 01009 'cvterm_id' => $cvterm->cvterm_id, 01010 'type_id' => $cvproptype->cvterm_id, 01011 'value' => $value, 01012 'rank' => $rank, 01013 ); 01014 $options = array( 01015 'statement_name' => 'ins_cvtermprop_cvtyvara', 01016 'return_record' => FALSE, 01017 ); 01018 $result = tripal_core_chado_insert('cvtermprop', $values, $options); 01019 if (!$result) { 01020 tripal_cv_obo_quiterror("Could not add property $property for term\n"); 01021 return FALSE; 01022 } 01023 return TRUE; 01024 } 01025 01026 01031 function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') { 01032 01033 // check to see if the dbxref exists if not, add it 01034 $values = array( 01035 'db_id' => $db_id, 01036 'accession' => $accession, 01037 ); 01038 $options = array('statement_name' => 'sel_dbxref_idac'); 01039 $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options); 01040 if (count($result) == 0){ 01041 $ins_values = array( 01042 'db_id' => $db_id, 01043 'accession' => $accession, 01044 'version' => $version, 01045 'description' => $description, 01046 ); 01047 $ins_options = array( 01048 'statement_name' => 'ins_dbxref_idacvede', 01049 'return_record' => FALSE 01050 ); 01051 $result = tripal_core_chado_insert('dbxref', $ins_values, $ins_options); 01052 if (!$result) { 01053 tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession"); 01054 return FALSE; 01055 } 01056 $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options); 01057 } 01058 return $result[0]; 01059 } 01060