Tripal v1.0 (6.x-1.0)
|
00001 <?php 00002 00009 # This script can be run as a stand-alone script to sync all the features from chado to drupal 00010 // Parameter f specifies the feature_id to sync 00011 // -f 0 will sync all features 00012 00013 $arguments = getopt("f:"); 00014 00015 if (isset($arguments['f'])) { 00016 $drupal_base_url = parse_url('http://www.example.com'); 00017 $_SERVER['HTTP_HOST'] = $drupal_base_url['host']; 00018 $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF']; 00019 $_SERVER['REMOTE_ADDR'] = NULL; 00020 $_SERVER['REQUEST_METHOD'] = NULL; 00021 00022 require_once 'includes/bootstrap.inc'; 00023 drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL); 00024 00025 $feature_id = $arguments['f']; 00026 00027 if ($feature_id > 0 ) { 00028 tripal_feature_sync_feature($feature_id); 00029 } 00030 else{ 00031 print "syncing all features...\n"; 00032 tripal_feature_sync_features(); 00033 } 00034 } 00038 function tripal_feature_sync_form() { 00039 00040 $form['description'] = array( 00041 '#type' => 'item', 00042 '#value' => t("Add feature types, optionally select an organism and ". 00043 "click the 'Sync all Features' button to create Drupal ". 00044 "content for features in chado. Only features of the types listed ". 00045 "below in the Feature Types box will be synced. You may limit the ". 00046 "features to be synced by a specific organism. Depending on the ". 00047 "number of features in the chado database this may take a long ". 00048 "time to complete. "), 00049 ); 00050 00051 $form['feature_types'] = array( 00052 '#title' => t('Feature Types'), 00053 '#type' => 'textarea', 00054 '#description' => t('Enter the names of the sequence types that the ". 00055 "site will support with independent pages. Pages for these data ". 00056 "types will be built automatically for features that exist in the ". 00057 "chado database. The names listed here should be spearated by ". 00058 "spaces or entered separately on new lines. The names must match ". 00059 "exactly (spelling and case) with terms in the sequence ontology'), 00060 '#required' => TRUE, 00061 '#default_value' => variable_get('chado_sync_feature_types', 'gene contig'), 00062 ); 00063 00064 // get the list of organisms 00065 $sql = "SELECT * FROM {organism} ORDER BY genus, species"; 00066 $orgs = tripal_organism_get_synced(); 00067 $organisms[] = ''; 00068 foreach ($orgs as $organism) { 00069 $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)"; 00070 } 00071 $form['organism_id'] = array( 00072 '#title' => t('Organism'), 00073 '#type' => t('select'), 00074 '#description' => t("Choose the organism for which features set above will be synced. Only organisms which also have been synced will appear in this list."), 00075 '#options' => $organisms, 00076 ); 00077 00078 00079 $form['button'] = array( 00080 '#type' => 'submit', 00081 '#value' => t('Sync all Features'), 00082 '#weight' => 3, 00083 ); 00084 00085 return $form; 00086 } 00090 function tripal_feature_sync_form_validate($form, &$form_state) { 00091 $organism_id = $form_state['values']['organism_id']; 00092 $feature_types = $form_state['values']['feature_types']; 00093 00094 // nothing to do 00095 } 00099 function tripal_feature_sync_form_submit($form, &$form_state) { 00100 00101 global $user; 00102 00103 $organism_id = $form_state['values']['organism_id']; 00104 $feature_types = $form_state['values']['feature_types']; 00105 00106 $job_args = array(0, $organism_id, $feature_types); 00107 00108 if ($organism_id) { 00109 $organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $organism_id)); 00110 $title = "Sync all features for " . $organism[0]->genus . " " . $organism[0]->species; 00111 } 00112 else { 00113 $title = t('Sync all features for all synced organisms'); 00114 } 00115 00116 variable_set('chado_sync_feature_types', $feature_types); 00117 00118 tripal_add_job($title, 'tripal_feature', 00119 'tripal_feature_sync_features', $job_args, $user->uid); 00120 } 00124 function tripal_feature_set_urls($job_id = NULL) { 00125 // first get the list of features that have been synced 00126 $sql = "SELECT * FROM {chado_feature}"; 00127 $nodes = db_query($sql); 00128 while ($node = db_fetch_object($nodes)) { 00129 // now get the feature details 00130 $sql = "SELECT * 00131 FROM feature F 00132 INNER JOIN organism O on O.organism_id = F.organism_id 00133 WHERE F.feature_id = %d"; 00134 $feature = db_fetch_object(chado_query($sql, $node->feature_id)); 00135 if ($feature) { 00136 tripal_feature_set_feature_url($node, $feature); 00137 } 00138 } 00139 } 00143 function tripal_feature_set_feature_url($node, $feature) { 00144 00145 // determine which URL alias to use 00146 $alias_type = variable_get('chado_feature_url', 'internal ID'); 00147 $aprefix = variable_get('chado_feature_accession_prefix', 'ID'); 00148 $genus = preg_replace('/\s/', '_', strtolower($feature->genus)); 00149 $species = preg_replace('/\s/', '_', strtolower($feature->species)); 00150 switch ($alias_type) { 00151 case 'feature name': 00152 $url_alias = $feature->name; 00153 break; 00154 case 'feature unique name': 00155 $url_alias = $feature->uniquename; 00156 break; 00157 case 'genus_species_uqname': 00158 $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $feature->uniquename; 00159 break; 00160 case 'genus species name': 00161 $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $feature->name; 00162 break; 00163 default: 00164 $url_alias = "$aprefix$feature->feature_id"; 00165 } 00166 print "Setting URL alias for $feature->name: node/$node->nid => $url_alias\n"; 00167 // remove any previous alias 00168 db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid"); 00169 // add the new alias 00170 path_set_alias("node/$node->nid", $url_alias); 00171 } 00177 function tripal_feature_sync_features($max_sync = 0, $organism_id = NULL, 00178 $feature_types = NULL, $job_id = NULL) { 00179 //print "Syncing features (max of $max_sync)\n"; 00180 $i = 0; 00181 00182 // get the list of available sequence ontology terms for which 00183 // we will build drupal pages from features in chado. If a feature 00184 // is not one of the specified typse we won't build a node for it. 00185 if (!$feature_types) { 00186 $allowed_types = variable_get('chado_sync_feature_types', 'gene contig'); 00187 } 00188 else { 00189 $allowed_types = $feature_types; 00190 } 00191 $allowed_types = preg_replace("/[\s\n\r]+/", " ", $allowed_types); 00192 00193 print "Looking for features of type: $allowed_types\n"; 00194 00195 $so_terms = split(' ', $allowed_types); 00196 $where_cvt = ""; 00197 foreach ($so_terms as $term) { 00198 $where_cvt .= "CVT.name = '$term' OR "; 00199 } 00200 $where_cvt = drupal_substr($where_cvt, 0, drupal_strlen($where_cvt)-3); # strip trailing 'OR' 00201 00202 // get the list of organisms that are synced and only include features from 00203 // those organisms 00204 $orgs = tripal_organism_get_synced(); 00205 $where_org = ""; 00206 foreach ($orgs as $org) { 00207 if ($organism_id) { 00208 if ($org->organism_id and $org->organism_id == $organism_id) { 00209 $where_org .= "F.organism_id = $org->organism_id OR "; 00210 } 00211 } 00212 else { 00213 if ($org->organism_id) { 00214 $where_org .= "F.organism_id = $org->organism_id OR "; 00215 } 00216 } 00217 } 00218 $where_org = drupal_substr($where_org, 0, drupal_strlen($where_org)-3); # strip trailing 'OR' 00219 00220 // use this SQL statement to get the features that we're going to upload 00221 $sql = "SELECT feature_id ". 00222 "FROM {FEATURE} F ". 00223 " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ". 00224 " INNER JOIN CV on CV.cv_id = CVT.cv_id ". 00225 "WHERE ($where_cvt) AND ($where_org) AND CV.name = 'sequence' ". 00226 "ORDER BY feature_id"; 00227 00228 // get the list of features 00229 $results = chado_query($sql); 00230 00231 // load into ids array 00232 $count = 0; 00233 $ids = array(); 00234 while ($id = db_fetch_object($results)) { 00235 $ids[$count] = $id->feature_id; 00236 $count++; 00237 } 00238 00239 // make sure our vocabularies are set before proceeding 00240 tripal_feature_set_vocabulary(); 00241 00242 // pre-create the SQL statement that will be used to check 00243 // if a feature has already been synced. We skip features 00244 // that have been synced 00245 $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d"; 00246 00247 // Iterate through features that need to be synced 00248 $interval = intval($count * 0.01); 00249 if ($interval < 1) { 00250 $interval = 1; 00251 } 00252 $num_ids = sizeof($ids); 00253 $i = 0; 00254 foreach ($ids as $feature_id) { 00255 // update the job status every 1% features 00256 if ($job_id and $i % $interval == 0) { 00257 tripal_job_set_progress($job_id, intval(($i/$count)*100)); 00258 } 00259 // if we have a maximum number to sync then stop when we get there 00260 // if not then just continue on 00261 if ($max_sync and $i == $max_sync) { 00262 return ''; 00263 } 00264 if (!db_fetch_object(db_query($sql, $feature_id))) { 00265 00266 # parsing all the features can cause memory overruns 00267 # we are not sure why PHP does not clean up the memory as it goes 00268 # to avoid this problem we will call this script through an 00269 # independent system call 00270 print "$i of $num_ids Syncing feature id: $feature_id\n"; 00271 $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/includes/syncFeatures.inc -f $feature_id "; 00272 system($cmd); 00273 00274 } 00275 $i++; 00276 } 00277 00278 return ''; 00279 } 00280 00286 function tripal_feature_sync_feature($feature_id) { 00287 //print "\tSyncing feature $feature_id\n"; 00288 00289 global $user; 00290 $create_node = 1; // set to 0 if the node exists and we just sync and not create 00291 00292 // get the accession prefix 00293 $aprefix = variable_get('chado_feature_accession_prefix', 'ID'); 00294 00295 // if we don't have a feature_id then return 00296 if (!$feature_id) { 00297 drupal_set_message(t("Please provide a feature_id to sync")); 00298 return ''; 00299 } 00300 00301 // get information about this feature 00302 $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ". 00303 " O.species,CVT.name as cvname,F.residues,F.organism_id ". 00304 "FROM {FEATURE} F ". 00305 " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ". 00306 " INNER JOIN Organism O ON F.organism_id = O.organism_ID ". 00307 "WHERE F.feature_id = %d"; 00308 $feature = db_fetch_object(chado_query($fsql, $feature_id)); 00309 00310 // get the synonyms for this feature 00311 $synsql = "SELECT S.name ". 00312 "FROM {feature_synonym} FS ". 00313 " INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id ". 00314 "WHERE FS.feature_id = %d"; 00315 $synonyms = chado_query($synsql, $feature_id); 00316 00317 // now add these synonyms to the feature object as a single string 00318 $synstring = ''; 00319 while ($synonym = db_fetch_object($synonyms)) { 00320 $synstring .= "$synonym->name\n"; 00321 } 00322 $feature->synonyms = $synstring; 00323 00324 // check to make sure that we don't have any nodes with this feature name as a title 00325 // but without a corresponding entry in the chado_feature table if so then we want to 00326 // clean up that node. (If a node is found we don't know if it belongs to our feature or 00327 // not since features can have the same name/title.) 00328 $tsql = "SELECT * FROM {node} N ". 00329 "WHERE title = '%s'"; 00330 $cnsql = "SELECT * FROM {chado_feature} ". 00331 "WHERE nid = %d"; 00332 $nodes = db_query($tsql, $feature->name); 00333 // cycle through all nodes that may have this title 00334 while ($node = db_fetch_object($nodes)) { 00335 $feature_nid = db_fetch_object(db_query($cnsql, $node->nid)); 00336 if (!$feature_nid) { 00337 drupal_set_message(t("%feature_id: A node is present but the chado_feature entry is missing... correcting", array('%feature_id' => $feature_id))); 00338 node_delete($node->nid); 00339 } 00340 } 00341 00342 // check if this feature already exists in the chado_feature table. 00343 // if we have a chado feature, we want to check to see if we have a node 00344 $cfsql = "SELECT * FROM {chado_feature} ". 00345 "WHERE feature_id = %d"; 00346 // @coder-ignore: don't need to use db_rewrite_sql() since need all nodes regardless of access control 00347 $nsql = "SELECT * FROM {node} N ". 00348 "WHERE nid = %d"; 00349 $chado_feature = db_fetch_object(db_query($cfsql, $feature->feature_id)); 00350 if ($chado_feature) { 00351 drupal_set_message(t("%feature_id: A chado_feature entry exists", array('%feature_id' => $feature_id))); 00352 $node = db_fetch_object(db_query($nsql, $chado_feature->nid)); 00353 if (!$node) { 00354 // if we have a chado_feature but not a node then we have a problem and 00355 // need to cleanup 00356 drupal_set_message(t("%feature_id: The node is missing, but has a chado_feature entry... correcting", array('%feature_id' => $feature_id))); 00357 $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d"; 00358 db_query($df_sql, $feature_id); 00359 } 00360 else { 00361 drupal_set_message(t("%feature_id: A corresponding node exists", array('%feature_id' => $feature_id))); 00362 $create_node = 0; 00363 } 00364 } 00365 00366 // if we've encountered an error then just return. 00367 if ($error_msg = db_error()) { 00368 //print "$error_msg\n"; 00369 return ''; 00370 } 00371 00372 // if a drupal node does not exist for this feature then we want to 00373 // create one. Note that the node_save call in this block 00374 // will call the hook_submit function which 00375 if ($create_node) { 00376 // get the organism for this feature 00377 $sql = "SELECT * FROM {organism} WHERE organism_id = %d"; 00378 $organism = db_fetch_object(chado_query($sql, $feature->organism_id)); 00379 00380 drupal_set_message(t("%feature_id: Creating node $feature->name", array('%feature_id' => $feature_id))); 00381 $new_node = new stdClass(); 00382 $new_node->type = 'chado_feature'; 00383 $new_node->uid = $user->uid; 00384 $new_node->title = "$feature->name, $feature->uniquename ($feature->cvname) $organism->genus $organism->species"; 00385 $new_node->fname = "$feature->name"; 00386 $new_node->uniquename = "$feature->uniquename"; 00387 $new_node->feature_id = $feature->feature_id; 00388 $new_node->residues = $feature->residues; 00389 $new_node->organism_id = $feature->organism_id; 00390 $new_node->feature_type = $feature->cvname; 00391 $new_node->synonyms = $feature->synonyms; 00392 00393 // validate the node and if okay then submit 00394 node_validate($new_node); 00395 if ($errors = form_get_errors()) { 00396 print "Error encountered validating new node. Cannot sync\n"; 00397 foreach ($errors as $key => $msg) { 00398 watchdog('trp-fsync', "%msg", array('%msg' => $msg), 'error'); 00399 } 00400 exit; 00401 } 00402 else { 00403 $node = node_submit($new_node); 00404 node_save($node); 00405 } 00406 } 00407 else { 00408 $node = $chado_feature; 00409 } 00410 00411 00412 // set the taxonomy for this node 00413 drupal_set_message(t("%feature_id ($node->nid): setting taxonomy", array('%feature_id' => $feature_id))); 00414 tripal_feature_set_taxonomy($node, $feature_id); 00415 00416 // reindex the node 00417 // drupal_set_message(t("$feature_id( $node->nid): indexing")); 00418 // tripal_feature_index_feature ($feature_id,$node->nid); 00419 00420 // set the URL alias for this node 00421 tripal_feature_set_feature_url($node, $feature); 00422 00423 00424 return ''; 00425 } 00426