{
"translatorID": "57a00950-f0d1-4b41-b6ba-44ff0fc30289",
"label": "Google Scholar",
"creator": "Simon Kornblith, Frank Bennett, Aurimas Vinckevicius",
"target": "^https?://scholar\\.google\\.(?:com|cat|(?:com?\\.)?[a-z]{2})/(?:scholar(?:_case)?\\?|citations\\?)",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2014-07-25 21:01:15"
}
/*
* Test pages
*
* Searches of Google Scholar with the following terms should yield a folder
* icon that works. Check that unlinked ([CITATION]) items that provide
* no BibTeX data (there is currently one under "Marbury v. Madison",
* and "clifford" seems to be a good source of garbage) are
* dropped from the listings:
*
* marbury v madison
* kelo
* smith
* view of the cathedral
* clifford
*
* "How cited" pages should NOT yield a page or folder icon. The
* Urls to these currently look like this:
*
* http://scholar.google.co.jp/scholar_case?about=1101424605047973909&q=kelo&hl=en&as_sdt=2002
*
* Case pages should present a document icon that works:
*
* http://scholar.google.co.jp/scholar_case?case=18273389148555376997&hl=en&as_sdt=2002&kqfp=13204897074208725174&kql=186&kqpfp=16170611681001262513#kq
*/
var bogusItemID = 1;
var __old_CF, __result_counter=0;
var detectWeb = function (doc, url) {
// Icon shows only for search results and law cases
if (url.indexOf('/scholar_case?') != -1 &&
url.indexOf('about=') == -1) {
return "case";
} else if(url.indexOf('/citations?') != -1) {
//individual saved citation
var link = ZU.xpathText(doc, '//a[@class="gsc_title_link"]/@href');
if(!link) return;
if(link.indexOf('/patents?') != -1) {
return 'patent';
} else if(link.indexOf('/scholar_case?') != -1) {
return 'case';
} else {
//Can't distinguish book from journalArticle
//Both have "Journal" fields
return 'journalArticle';
}
} else if( getViableResults(doc).length ) {
return "multiple";
}
}
/*********************************
* Cookie manipulation functions *
*********************************/
//sets Google Scholar Preference cookie
function setGSPCookie(doc, cf) {
var m = doc.cookie.match(/\bGSP=[^;]+/);
var cookie = m ? m[0] : '';
if(!cookie) return;
Z.debug('Changing cookie: ' + cookie);
if(cookie.search(/\bCF=/) != -1) {
cookie = cookie.replace(/\s*\bCF=\d*(:?)/,cf ? 'CF=' + cf + '$1' : '');
} else {
cookie += ':CF=' + cf;
}
// Make sure we capture "0-" in
// http://0-scholar.google.co.za.innopac.up.ac.za/...
var domain = doc.location.href
.match(/https?:\/\/[^\/]*?([^.\/]*scholar\.google\.[^:\/]+)/i)[1];
cookie += '; domain=.' + domain +
'; expires=Sun, 17 Jan 2038 19:14:09 UTC'; //this is what google scholar uses
doc.cookie = cookie;
Z.debug('Cookie set to: ' + cookie);
}
//set cookie using Googles Scholar preferences page
function setCookieThroughPrefs(doc, callback) {
url = doc.location.href.replace(/hl\=[^&]*&?/, "")
.replace("scholar?",
"scholar_settings?");
ZU.doGet(url, function(scisigDoc) {
var scisig = //ig);
if(!form) {
Z.debug('No forms found on page.');
Z.debug(scisigDoc);
} else {
Z.debug(form.join('\n\n'));
}
}
url = url.replace("scholar_settings?", "scholar_setprefs?")
+ "&scis=yes&scisf=4&submit=&scisig="+scisig[1];
//set prefernces
Z.debug('Submitting settings to Google Scholar: ' + url);
ZU.doGet(url, function(response) { callback(doc); });
});
}
function prepareCookie(doc, callback) {
// Google Scholar always sets GSP
if(doc.cookie.search(/\bGSP=/) != -1) {
//check if we need to change cookie
var m = doc.cookie.match(/\bGSP=[^;]*?\bCF=(\d+)/);
__old_CF = undefined;
if(!m || m[1] != '4') {
__old_CF = (m && m[1]) || '';
setGSPCookie(doc, '4');
}
callback(doc);
} else {
Z.debug("Attempting to set cookie through GS Settings page");
//some proxies do not pass cookies through, so we need to set this by
//going to the preferences page
setCookieThroughPrefs(doc, callback);
}
}
function restoreCookie(doc) {
if(__old_CF != undefined) {
setGSPCookie(doc, __old_CF);
}
}
function decrementCounter(doc) {
/**Possible race condition!! But there should never be any
* lock-ups or detremental effects as long as only one
* instance of the translator can be run at a time and
* we do not change __old_cookie after setting it initially
*/
__result_counter--;
if(__result_counter<1) restoreCookie(doc);
}
/*****************************
* Other accessory functions *
*****************************/
//determine item type from a result node
function determineType(result) {
var titleHref = ZU.xpathText(result, './/h3[@class="gs_rt"]/a[1]/@href');
if(titleHref) {
if(titleHref.indexOf('/scholar_case?') != -1) {
return 'case';
} else if(titleHref.indexOf('/patents?') != -1) {
return 'patent';
} else if(titleHref.indexOf('/books?') != -1) {
return 'book';
} else if(titleHref.indexOf('/citations?') == -1){
//not a saved citation
return 'article';
}
}
/**if there is no link (i.e. [CITATION]), or we're looking at saved citations
* we can determine this by the second line.
* Patents have the word Patent here
* Cases seem to always start with a number
* Books just have year after last dash
* Articles are assumed to be everything else
*
* This is probably not going to work with google scholar in other languages
*/
var subTitle = ZU.xpathText(result, './/div[@class="gs_a"]');
if(!subTitle) return 'article';
subTitle = subTitle.trim();
if(subTitle.search(/\bpatent\s+\d/i) != -1) {
return 'patent';
}
if(subTitle.search(/^\d/) != -1) {
return 'case';
}
if(subTitle.search(/-\s*\d+$/) != -1) {
return 'book';
}
return 'article';
}
function getAttachment(url, title) {
//try to determine mimeType from title
var m = title.match(/^\s*\[([^\]]+)\]/);
if(!m) return;
m = m[1].toUpperCase();
var mimeType = getAttachment.mimeTypes[m];
if(!mimeType) return;
return {title: title, url: url, mimeType: mimeType};
}
getAttachment.mimeTypes = {
'PDF': 'application/pdf',
'DOC': 'application/msword',
'HTML': 'text/html'
};
/*********************
* Scraper functions *
*********************/
function getViableResults(doc) {
return ZU.xpath(doc, '//div[@class="gs_r"]\
[.//div[@class="gs_fl"]/a[contains(@href,"q=info:")\
or contains(@href,"q=related:")\
or contains(@onclick, "gs_ocit(event")]]');
}
function scrapeArticleResults(doc, articles) {
for(var i=0, n=articles.length; i 0) {
return true;
}
return false;
};
ItemFactory.prototype.getDate = function () {
var i, m;
// Citelet parsing, step (1)
if (!this.hyphenSplit) {
this.hyphenSplit = this.citelet.split(/\s+-\s+/);
this.trailingInfo = this.hyphenSplit.slice(-1);
}
if (!this.v.date && this.v.date !== false) {
this.v.date = false;
for (i = this.hyphenSplit.length - 1; i > -1; i += -1) {
m = this.hyphenSplit[i].match(/(?:(.*)\s+)*([0-9]{4})$/);
if (m) {
this.v.date = m[2];
if (m[1]) {
this.hyphenSplit[i] = m[1];
} else {
this.hyphenSplit[i] = "";
}
this.hyphenSplit = this.hyphenSplit.slice(0, i + 1);
break;
}
}
}
return this.v.date;
};
ItemFactory.prototype.getCourt = function () {
var s, m;
// Citelet parsing, step (2)
s = this.hyphenSplit.pop().replace(/,\s*$/, "").replace(/\u2026\s*$/, "Court");
m = s.match(/(?:([a-zA-Z]+):\s*)*(.*)/);
if (m) {
this.v.court = m[2].replace(/_/g, " ");
if (m[1]) {
this.v.extra = "{:jurisdiction: " + m[1] + "}";
}
}
return this.v.court;
};
ItemFactory.prototype.getVolRepPag = function () {
var i, m;
// Citelet parsing, step (3)
if (this.hyphenSplit.length) {
this.commaSplit = this.hyphenSplit.slice(-1)[0].split(/\s*,\s+/);
var gotOne = false;
for (i = this.commaSplit.length - 1; i > -1; i += -1) {
m = this.commaSplit[i].match(/^([0-9]+)\s+(.*)\s+(.*)/);
if (m) {
var volRepPag = {};
volRepPag.volume = m[1];
volRepPag.reporter = m[2];
volRepPag.pages = m[3].replace(/\s*$/, "");
this.commaSplit.pop();
if (!volRepPag.pages.match(/[0-9]$/) && (i > 0 || gotOne)) {
continue;
}
gotOne = true;
this.vv.volRepPag.push(volRepPag);
} else {
break;
}
}
}
};
ItemFactory.prototype.getTitle = function () {
// Citelet parsing, step (4) [optional]
if (this.commaSplit) {
this.v.title = this.commaSplit.join(", ");
}
};
ItemFactory.prototype.getDocketNumber = function (doc) {
var docNumFrag = doc.evaluate(
'//center[preceding-sibling::center//h3[@id="gsl_case_name"]]\
| //div[@class="gsc_value" and preceding-sibling::div[text()="Docket id"]]',
doc, null, XPathResult.ANY_TYPE, null).iterateNext();
if (docNumFrag) {
this.v.docketNumber = docNumFrag.textContent
.replace(/^\s*[Nn][Oo](?:.|\s+)\s*/, "")
.replace(/\.\s*$/, "");
}
};
ItemFactory.prototype.getAttachments = function (doctype) {
var i, ilen, attachments;
attachments = [];
for (i = 0, ilen = this.attachmentLinks.length; i < ilen; i += 1) {
attachments.push({title:"Google Scholar Linked " + doctype, type:"text/html",
url:this.attachmentLinks[i]});
}
return attachments;
};
ItemFactory.prototype.pushAttachments = function (doctype) {
this.item.attachments = this.getAttachments(doctype);
};
/*
ItemFactory.prototype.getBibtexData = function (callback) {
if (!this.bibtexData) {
if (this.bibtexData !== false) {
Zotero.Utilities.doGet(this.bibtexLink, function(bibtexData) {
if (!bibtexData.match(/title={{}}/)) {
this.bibtexData = bibtexData;
} else {
this.bibtexData = false;
}
callback(this.bibtexData);
});
return;
}
}
callback(this.bibtexData);
};
*/
ItemFactory.prototype.saveItem = function () {
var i, ilen, key;
if (this.v.title) {
this.repairTitle();
if (this.vv.volRepPag.length) {
var completed_items = [];
for (i = 0, ilen = this.vv.volRepPag.length; i < ilen; i += 1) {
this.item = new Zotero.Item("case");
for (key in this.vv.volRepPag[i]) {
if (this.vv.volRepPag[i][key]) {
this.item[key] = this.vv.volRepPag[i][key];
}
}
this.saveItemCommonVars();
if (i === (this.vv.volRepPag.length - 1)) {
this.pushAttachments("Judgement");
}
this.item.itemID = "" + bogusItemID;
bogusItemID += 1;
completed_items.push(this.item);
}
for (i = 0, ilen = completed_items.length; i < ilen; i += 1) {
for (j = 0, jlen = completed_items.length; j < jlen; j += 1) {
if (i === j) {
continue;
}
completed_items[i].seeAlso.push(completed_items[j].itemID);
}
completed_items[i].complete();
}
} else {
this.item = new Zotero.Item("case");
this.saveItemCommonVars();
this.pushAttachments("Judgement");
this.item.complete();
}
}
};
ItemFactory.prototype.saveItemCommonVars = function () {
for (key in this.v) {
if (this.v[key]) {
this.item[key] = this.v[key];
}
}
};
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://scholar.google.com/scholar?q=marbury&hl=en&btnG=Search&as_sdt=1%2C22&as_sdtp=on",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=kelo&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=smith&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=view+of+the+cathedral&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=clifford&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar_case?case=9834052745083343188&q=marbury+v+madison&hl=en&as_sdt=2,5",
"items": [
{
"itemType": "case",
"creators": [],
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
{
"title": "Google Scholar Linked Judgement",
"type": "text/html",
"url": false
}
],
"volume": "5",
"reporter": "US",
"pages": "137",
"title": "Marbury v. Madison",
"court": "Supreme Court",
"date": "1803",
"itemID": "1",
"libraryCatalog": "Google Scholar"
}
]
}
]
/** END TEST CASES **/