You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tellico/src/fetch/animenfofetcher.cpp

379 lines
12 KiB

/***************************************************************************
copyright : (C) 2006 by Robby Stephenson
email : robby@periapsis.org
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of version 2 of the GNU General Public License as *
* published by the Free Software Foundation; *
* *
***************************************************************************/
#include "animenfofetcher.h"
#include "messagehandler.h"
#include "../tellico_kernel.h"
#include "../tellico_utils.h"
#include "../collections/videocollection.h"
#include "../entry.h"
#include "../filehandler.h"
#include "../latin1literal.h"
#include "../imagefactory.h"
#include "../tellico_debug.h"
#include <tdelocale.h>
#include <tdeconfig.h>
#include <tdeio/job.h>
#include <tqregexp.h>
#include <tqlayout.h>
#include <tqlabel.h>
#include <tqfile.h>
//#define ANIMENFO_TEST
namespace {
static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php";
}
using Tellico::Fetch::AnimeNfoFetcher;
AnimeNfoFetcher::AnimeNfoFetcher(TQObject* parent_, const char* name_ /*=0*/)
: Fetcher(parent_, name_), m_started(false) {
}
TQString AnimeNfoFetcher::defaultName() {
return TQString::fromLatin1("AnimeNfo.com");
}
TQString AnimeNfoFetcher::source() const {
return m_name.isEmpty() ? defaultName() : m_name;
}
bool AnimeNfoFetcher::canFetch(int type) const {
return type == Data::Collection::Video;
}
void AnimeNfoFetcher::readConfigHook(const TDEConfigGroup& config_) {
Q_UNUSED(config_);
}
void AnimeNfoFetcher::search(FetchKey key_, const TQString& value_) {
m_started = true;
m_matches.clear();
#ifdef ANIMENFO_TEST
KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/animenfo.html"));
#else
KURL u(TQString::fromLatin1(ANIMENFO_BASE_URL));
u.addQueryItem(TQString::fromLatin1("action"), TQString::fromLatin1("Go"));
u.addQueryItem(TQString::fromLatin1("option"), TQString::fromLatin1("keywords"));
u.addQueryItem(TQString::fromLatin1("queryin"), TQString::fromLatin1("anime_titles"));
if(!canFetch(Kernel::self()->collectionType())) {
message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
stop();
return;
}
switch(key_) {
case Keyword:
u.addQueryItem(TQString::fromLatin1("query"), value_);
break;
default:
kdWarning() << "AnimeNfoFetcher::search() - key not recognized: " << key_ << endl;
stop();
return;
}
#endif
// myDebug() << "AnimeNfoFetcher::search() - url: " << u.url() << endl;
m_job = TDEIO::get(u, false, false);
connect(m_job, TQT_SIGNAL(data(TDEIO::Job*, const TQByteArray&)),
TQT_SLOT(slotData(TDEIO::Job*, const TQByteArray&)));
connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)),
TQT_SLOT(slotComplete(TDEIO::Job*)));
}
void AnimeNfoFetcher::stop() {
if(!m_started) {
return;
}
if(m_job) {
m_job->kill();
m_job = 0;
}
m_data.truncate(0);
m_started = false;
emit signalDone(this);
}
void AnimeNfoFetcher::slotData(TDEIO::Job*, const TQByteArray& data_) {
TQDataStream stream(m_data, IO_WriteOnly | IO_Append);
stream.writeRawBytes(data_.data(), data_.size());
}
void AnimeNfoFetcher::slotComplete(TDEIO::Job* job_) {
// myDebug() << "AnimeNfoFetcher::slotComplete()" << endl;
// since the fetch is done, don't worry about holding the job pointer
m_job = 0;
if(job_->error()) {
job_->showErrorDialog(Kernel::self()->widget());
stop();
return;
}
if(m_data.isEmpty()) {
myDebug() << "AnimeNfoFetcher::slotComplete() - no data" << endl;
stop();
return;
}
TQString s = Tellico::decodeHTML(TQString(m_data));
TQRegExp infoRx(TQString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)</td>"), false);
infoRx.setMinimal(true);
TQRegExp anchorRx(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false);
anchorRx.setMinimal(true);
TQRegExp yearRx(TQString::fromLatin1("\\d{4}"), false);
// search page comes in groups of threes
int n = 0;
TQString u, t, y;
for(int pos = infoRx.search(s); m_started && pos > -1; pos = infoRx.search(s, pos+1)) {
if(n == 0 && !u.isEmpty()) {
SearchResult* r = new SearchResult(this, t, y, TQString());
emit signalResultFound(r);
#ifdef ANIMENFO_TEST
KURL url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/animetitle.html"));
#else
KURL url(TQString::fromLatin1(ANIMENFO_BASE_URL), u);
url.setQuery(TQString());
#endif
m_matches.insert(r->uid, url);
u.truncate(0);
t.truncate(0);
y.truncate(0);
}
switch(n) {
case 0: // title and url
{
int pos2 = anchorRx.search(infoRx.cap(1));
if(pos2 > -1) {
u = anchorRx.cap(1);
t = anchorRx.cap(2);
}
}
break;
case 1: // don't case
break;
case 2:
if(yearRx.exactMatch(infoRx.cap(1))) {
y = infoRx.cap(1);
}
break;
}
n = (n+1)%3;
}
// grab last response
#ifndef ANIMENFO_TEST
if(!u.isEmpty()) {
SearchResult* r = new SearchResult(this, t, y, TQString());
emit signalResultFound(r);
KURL url(TQString::fromLatin1(ANIMENFO_BASE_URL), u);
url.setQuery(TQString());
m_matches.insert(r->uid, url);
}
#endif
stop();
}
Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntry(uint uid_) {
// if we already grabbed this one, then just pull it out of the dict
Data::EntryPtr entry = m_entries[uid_];
if(entry) {
return entry;
}
KURL url = m_matches[uid_];
if(url.isEmpty()) {
kdWarning() << "AnimeNfoFetcher::fetchEntry() - no url in map" << endl;
return 0;
}
TQString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
if(results.isEmpty()) {
myDebug() << "AnimeNfoFetcher::fetchEntry() - no text results" << endl;
return 0;
}
#if 0
kdWarning() << "Remove debug from animenfofetcher.cpp" << endl;
TQFile f(TQString::fromLatin1("/tmp/test.html"));
if(f.open(IO_WriteOnly)) {
TQTextStream t(&f);
t.setEncoding(TQTextStream::UnicodeUTF8);
t << results;
}
f.close();
#endif
entry = parseEntry(results);
if(!entry) {
myDebug() << "AnimeNfoFetcher::fetchEntry() - error in processing entry" << endl;
return 0;
}
m_entries.insert(uid_, entry); // keep for later
return entry;
}
Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const TQString& str_) {
// myDebug() << "AnimeNfoFetcher::parseEntry()" << endl;
// class might be anime_info_top
TQRegExp infoRx(TQString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)</td>"), false);
infoRx.setMinimal(true);
TQRegExp tagRx(TQString::fromLatin1("<.*>"));
tagRx.setMinimal(true);
TQRegExp anchorRx(TQString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false);
anchorRx.setMinimal(true);
TQRegExp jsRx(TQString::fromLatin1("<script.*</script>"), false);
jsRx.setMinimal(true);
TQString s = str_;
s.remove(jsRx);
Data::CollPtr coll = new Data::VideoCollection(true);
// add new fields
Data::FieldPtr f = new Data::Field(TQString::fromLatin1("origtitle"), i18n("Original Title"));
coll->addField(f);
f = new Data::Field(TQString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table);
f->setFormatFlag(Data::Field::FormatTitle);
coll->addField(f);
f = new Data::Field(TQString::fromLatin1("distributor"), i18n("Distributor"));
f->setCategory(i18n("Other People"));
f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped);
f->setFormatFlag(Data::Field::FormatPlain);
coll->addField(f);
f = new Data::Field(TQString::fromLatin1("episodes"), i18n("Episodes"), Data::Field::Number);
f->setCategory(i18n("Features"));
coll->addField(f);
// map captions in HTML to field names
TQMap<TQString, TQString> fieldMap;
fieldMap.insert(TQString::fromLatin1("Title"), TQString::fromLatin1("title"));
fieldMap.insert(TQString::fromLatin1("Japanese Title"), TQString::fromLatin1("origtitle"));
fieldMap.insert(TQString::fromLatin1("Total Episodes"), TQString::fromLatin1("episodes"));
fieldMap.insert(TQString::fromLatin1("Genres"), TQString::fromLatin1("genre"));
fieldMap.insert(TQString::fromLatin1("Year Published"), TQString::fromLatin1("year"));
fieldMap.insert(TQString::fromLatin1("Studio"), TQString::fromLatin1("studio"));
fieldMap.insert(TQString::fromLatin1("US Distribution"), TQString::fromLatin1("distributor"));
Data::EntryPtr entry = new Data::Entry(coll);
int n = 0;
TQString key, value;
int oldpos = -1;
for(int pos = infoRx.search(s); pos > -1; pos = infoRx.search(s, pos+1)) {
if(n == 0 && !key.isEmpty()) {
if(fieldMap.contains(key)) {
value = value.simplifyWhiteSpace();
if(value.length() > 2) { // might be "-"
if(key == Latin1Literal("Genres")) {
entry->setField(fieldMap[key], TQStringList::split(TQRegExp(TQString::fromLatin1("\\s*,\\s*")),
value).join(TQString::fromLatin1("; ")));
} else {
entry->setField(fieldMap[key], value);
}
}
}
key.truncate(0);
value.truncate(0);
}
switch(n) {
case 0:
key = infoRx.cap(1).remove(tagRx);
break;
case 1:
value = infoRx.cap(1).remove(tagRx);
break;
}
n = (n+1)%2;
oldpos = pos;
}
// image
TQRegExp imgRx(TQString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']")
.arg(entry->field(TQString::fromLatin1("title"))), false);
imgRx.setMinimal(true);
int pos = imgRx.search(s);
if(pos > -1) {
KURL imgURL(TQString::fromLatin1(ANIMENFO_BASE_URL), imgRx.cap(1));
TQString id = ImageFactory::addImage(imgURL, true);
if(!id.isEmpty()) {
entry->setField(TQString::fromLatin1("cover"), id);
}
}
// now look for alternative titles and plot
const TQString a = TQString::fromLatin1("Alternative titles");
pos = s.find(a, oldpos+1, false);
if(pos > -1) {
pos += a.length();
}
int pos2 = -1;
if(pos > -1) {
pos2 = s.find(TQString::fromLatin1("Description"), pos+1, true);
if(pos2 > -1) {
value = s.mid(pos, pos2-pos).remove(tagRx).simplifyWhiteSpace();
entry->setField(TQString::fromLatin1("alttitle"), value);
}
}
TQRegExp descRx(TQString::fromLatin1("class\\s*=\\s*[\"']description[\"'][^>]*>(.*)<"), false);
descRx.setMinimal(true);
pos = descRx.search(s, TQMAX(pos, pos2));
if(pos > -1) {
entry->setField(TQString::fromLatin1("plot"), descRx.cap(1).simplifyWhiteSpace());
}
return entry;
}
void AnimeNfoFetcher::updateEntry(Data::EntryPtr entry_) {
TQString t = entry_->field(TQString::fromLatin1("title"));
if(!t.isEmpty()) {
search(Fetch::Keyword, t);
return;
}
emit signalDone(this); // always need to emit this if not continuing with the search
}
Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(TQWidget* parent_) const {
return new AnimeNfoFetcher::ConfigWidget(parent_);
}
AnimeNfoFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_)
: Fetch::ConfigWidget(parent_) {
TQVBoxLayout* l = new TQVBoxLayout(optionsWidget());
l->addWidget(new TQLabel(i18n("This source has no options."), optionsWidget()));
l->addStretch();
}
TQString AnimeNfoFetcher::ConfigWidget::preferredName() const {
return AnimeNfoFetcher::defaultName();
}
#include "animenfofetcher.moc"