mirror of
https://github.com/tuxbox-neutrino/neutrino.git
synced 2025-08-29 16:31:11 +02:00
yt: speed up retrieving feed data by starting multiple (up to 8, default is 4) http transfers in parallel
This commit is contained in:
@@ -22,6 +22,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
@@ -29,6 +30,9 @@
|
||||
#include <bitset>
|
||||
#include <string>
|
||||
|
||||
#include <OpenThreads/ScopedLock>
|
||||
#include "set_threadname.h"
|
||||
|
||||
#include "ytparser.h"
|
||||
|
||||
#if LIBCURL_VERSION_NUM < 0x071507
|
||||
@@ -90,6 +94,7 @@ cYTFeedParser::cYTFeedParser()
|
||||
feedmode = -1;
|
||||
tquality = "mqdefault";
|
||||
max_results = 25;
|
||||
concurrent_downloads = 2;
|
||||
curl_handle = curl_easy_init();
|
||||
}
|
||||
|
||||
@@ -107,20 +112,23 @@ size_t cYTFeedParser::CurlWriteToString(void *ptr, size_t size, size_t nmemb, vo
|
||||
return size*nmemb;
|
||||
}
|
||||
|
||||
bool cYTFeedParser::getUrl(std::string &url, std::string &answer)
|
||||
bool cYTFeedParser::getUrl(std::string &url, std::string &answer, CURL *_curl_handle)
|
||||
{
|
||||
curl_easy_setopt(curl_handle, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, &cYTFeedParser::CurlWriteToString);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_FILE, (void *)&answer);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_FAILONERROR, 1);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, (long)1);
|
||||
if (!_curl_handle)
|
||||
_curl_handle = curl_handle;
|
||||
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_WRITEFUNCTION, &cYTFeedParser::CurlWriteToString);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_FILE, (void *)&answer);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_FAILONERROR, 1);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_NOSIGNAL, (long)1);
|
||||
|
||||
char cerror[CURL_ERROR_SIZE];
|
||||
curl_easy_setopt(curl_handle, CURLOPT_ERRORBUFFER, cerror);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_ERRORBUFFER, cerror);
|
||||
|
||||
printf("try to get [%s] ...\n", url.c_str());
|
||||
CURLcode httpres = curl_easy_perform(curl_handle);
|
||||
CURLcode httpres = curl_easy_perform(_curl_handle);
|
||||
|
||||
printf("http: res %d size %d\n", httpres, answer.size());
|
||||
|
||||
@@ -131,31 +139,34 @@ bool cYTFeedParser::getUrl(std::string &url, std::string &answer)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cYTFeedParser::DownloadUrl(std::string &url, std::string &file)
|
||||
bool cYTFeedParser::DownloadUrl(std::string &url, std::string &file, CURL *_curl_handle)
|
||||
{
|
||||
if (!_curl_handle)
|
||||
_curl_handle = curl_handle;
|
||||
|
||||
FILE * fp = fopen(file.c_str(), "wb");
|
||||
if (fp == NULL) {
|
||||
perror(file.c_str());
|
||||
return false;
|
||||
}
|
||||
curl_easy_setopt(curl_handle, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, NULL);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_FILE, fp);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_FAILONERROR, 1);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
|
||||
curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, (long)1);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_WRITEFUNCTION, NULL);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_FILE, fp);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_FAILONERROR, 1);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_NOSIGNAL, (long)1);
|
||||
|
||||
char cerror[CURL_ERROR_SIZE];
|
||||
curl_easy_setopt(curl_handle, CURLOPT_ERRORBUFFER, cerror);
|
||||
curl_easy_setopt(_curl_handle, CURLOPT_ERRORBUFFER, cerror);
|
||||
|
||||
printf("try to get [%s] ...\n", url.c_str());
|
||||
CURLcode httpres = curl_easy_perform(curl_handle);
|
||||
CURLcode httpres = curl_easy_perform(_curl_handle);
|
||||
|
||||
double dsize;
|
||||
curl_easy_getinfo(curl_handle, CURLINFO_SIZE_DOWNLOAD, &dsize);
|
||||
curl_easy_getinfo(_curl_handle, CURLINFO_SIZE_DOWNLOAD, &dsize);
|
||||
fclose(fp);
|
||||
|
||||
printf("http: res %d size %f.\n", httpres, dsize);
|
||||
printf("http: res %d size %g.\n", httpres, dsize);
|
||||
|
||||
if (httpres != 0) {
|
||||
printf("curl error: %s\n", cerror);
|
||||
@@ -360,12 +371,22 @@ bool cYTFeedParser::parseFeedXml(std::string &answer)
|
||||
/* save first one, if wanted not found */
|
||||
if (vinfo.thumbnail.empty())
|
||||
vinfo.thumbnail = thumbnail;
|
||||
if (ParseVideoInfo(vinfo))
|
||||
videos.push_back(vinfo);
|
||||
vinfo.ret = false;
|
||||
videos.push_back(vinfo);
|
||||
}
|
||||
entry = entry->xmlNextNode;
|
||||
}
|
||||
xmlFreeDoc(answer_parser);
|
||||
|
||||
GetVideoUrls();
|
||||
|
||||
std::vector<cYTVideoInfo>::iterator pos = videos.begin();
|
||||
while (pos != videos.end())
|
||||
if ((*pos).ret)
|
||||
++pos;
|
||||
else
|
||||
pos = videos.erase(pos);
|
||||
|
||||
parsed = !videos.empty();
|
||||
return parsed;
|
||||
}
|
||||
@@ -532,7 +553,7 @@ bool cYTFeedParser::ParseFeed(yt_feed_mode_t mode, std::string search, std::stri
|
||||
return ParseFeed(url);
|
||||
}
|
||||
|
||||
bool cYTFeedParser::ParseVideoInfo(cYTVideoInfo &vinfo)
|
||||
bool cYTFeedParser::ParseVideoInfo(cYTVideoInfo &vinfo, CURL *_curl_handle)
|
||||
{
|
||||
bool ret = false;
|
||||
std::vector<std::string> estr;
|
||||
@@ -547,44 +568,98 @@ bool cYTFeedParser::ParseVideoInfo(cYTVideoInfo &vinfo)
|
||||
vurl += "&ps=default&eurl=&gl=US&hl=en";
|
||||
printf("cYTFeedParser::ParseVideoInfo: get [%s]\n", vurl.c_str());
|
||||
std::string answer;
|
||||
if (!getUrl(vurl, answer))
|
||||
if (!getUrl(vurl, answer, _curl_handle))
|
||||
continue;
|
||||
ret = decodeVideoInfo(answer, vinfo);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
vinfo.ret = ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *cYTFeedParser::DownloadThumbnailsThread(void *arg)
|
||||
{
|
||||
set_threadname("YT::DownloadThumbnails");
|
||||
bool ret = true;
|
||||
cYTFeedParser *caller = (cYTFeedParser *)arg;
|
||||
CURL *c = curl_easy_init();
|
||||
unsigned int i;
|
||||
do {
|
||||
OpenThreads::ScopedLock<OpenThreads::Mutex> m_lock(caller->mutex);
|
||||
i = caller->worker_index++;
|
||||
} while (i < caller->videos.size() && ((ret &= caller->DownloadThumbnail(caller->videos[i], c)) || true));
|
||||
curl_easy_cleanup(c);
|
||||
pthread_exit(&ret);
|
||||
}
|
||||
|
||||
bool cYTFeedParser::DownloadThumbnail(cYTVideoInfo &vinfo, CURL *_curl_handle)
|
||||
{
|
||||
if (!_curl_handle)
|
||||
_curl_handle = curl_handle;
|
||||
bool found = false;
|
||||
if (!vinfo.thumbnail.empty()) {
|
||||
std::string fname = thumbnail_dir + "/" + vinfo.id + ".jpg";
|
||||
found = !access(fname.c_str(), F_OK);
|
||||
if (!found)
|
||||
found = DownloadUrl(vinfo.thumbnail, fname, _curl_handle);
|
||||
if (found)
|
||||
vinfo.tfile = fname;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
bool cYTFeedParser::DownloadThumbnails()
|
||||
{
|
||||
bool ret = false;
|
||||
if (mkdir(thumbnail_dir.c_str(), 0755)) {
|
||||
bool ret = true;
|
||||
if (mkdir(thumbnail_dir.c_str(), 0755) && errno != EEXIST) {
|
||||
perror(thumbnail_dir.c_str());
|
||||
//return ret;
|
||||
return false;
|
||||
}
|
||||
for (unsigned i = 0; i < videos.size(); i++) {
|
||||
if (!videos[i].thumbnail.empty()) {
|
||||
std::string fname = thumbnail_dir;
|
||||
fname += "/";
|
||||
fname += videos[i].id;
|
||||
fname += ".jpg";
|
||||
bool found = !access(fname.c_str(), F_OK);
|
||||
if (!found)
|
||||
found = DownloadUrl(videos[i].thumbnail, fname);
|
||||
if (found)
|
||||
videos[i].tfile = fname;
|
||||
ret |= found;
|
||||
}
|
||||
unsigned int max_threads = concurrent_downloads;
|
||||
if (videos.size() < max_threads)
|
||||
max_threads = videos.size();
|
||||
pthread_t ta[max_threads];
|
||||
worker_index = 0;
|
||||
for (unsigned i = 0; i < max_threads; i++)
|
||||
pthread_create(&ta[i], NULL, cYTFeedParser::DownloadThumbnailsThread, this);
|
||||
for (unsigned i = 0; i < max_threads; i++) {
|
||||
void *r;
|
||||
pthread_join(ta[i], &r);
|
||||
ret &= *((bool *)r);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *cYTFeedParser::GetVideoUrlsThread(void *arg)
|
||||
{
|
||||
set_threadname("YT::GetVideoUrls");
|
||||
int ret = 0;
|
||||
cYTFeedParser *caller = (cYTFeedParser *)arg;
|
||||
CURL *c = curl_easy_init();
|
||||
unsigned int i;
|
||||
do {
|
||||
OpenThreads::ScopedLock<OpenThreads::Mutex> m_lock(caller->mutex);
|
||||
i = caller->worker_index++;
|
||||
} while (i < caller->videos.size() && ((ret |= caller->ParseVideoInfo(caller->videos[i], c)) || true));
|
||||
curl_easy_cleanup(c);
|
||||
pthread_exit(&ret);
|
||||
}
|
||||
|
||||
bool cYTFeedParser::GetVideoUrls()
|
||||
{
|
||||
bool ret = false;
|
||||
for (unsigned i = 0; i < videos.size(); i++) {
|
||||
ret |= ParseVideoInfo(videos[i]);
|
||||
int ret = 0;
|
||||
unsigned int max_threads = concurrent_downloads;
|
||||
if (videos.size() < max_threads)
|
||||
max_threads = videos.size();
|
||||
pthread_t ta[max_threads];
|
||||
worker_index = 0;
|
||||
for (unsigned i = 0; i < max_threads; i++)
|
||||
pthread_create(&ta[i], NULL, cYTFeedParser::GetVideoUrlsThread, this);
|
||||
for (unsigned i = 0; i < max_threads; i++) {
|
||||
void *r;
|
||||
pthread_join(ta[i], &r);
|
||||
ret |= *((int *)r);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user