Files
recycled-ni-neutrino/src/system/ytparser.cpp
Jacek Jendrzej d58f68cf69 some inits
Origin commit data
------------------
Branch: ni/coolstream
Commit: f4f351e9ba
Author: Jacek Jendrzej <overx300@gmail.com>
Date: 2017-01-30 (Mon, 30 Jan 2017)


------------------
No further description and justification available within origin commit message!

------------------
This commit was generated by Migit
2017-01-30 17:22:41 +01:00

717 lines
19 KiB
C++

/*
Copyright (C) 2013 CoolStream International Ltd
License: GPLv2
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation;
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <fstream>
#include <set>
#include <map>
#include <vector>
#include <bitset>
#include <string>
#include <OpenThreads/ScopedLock>
#include "settings.h"
#include "helpers.h"
#include "set_threadname.h"
#include <global.h>
#include <json/json.h>
#include "ytparser.h"
#include "ytcache.h"
#if LIBCURL_VERSION_NUM < 0x071507
#include <curl/types.h>
#endif
#define URL_TIMEOUT 60
static int itags[] = { 37 /* 1080p MP4 */, 22 /* 720p MP4 */, 18 /* 270p/360p MP4 */, 0 };
std::string cYTVideoUrl::GetUrl()
{
std::string fullurl = url;
//fullurl += "&signature=";
//fullurl += sig;
return fullurl;
}
void cYTVideoInfo::Dump()
{
printf("id: %s\n", id.c_str());
printf("author: %s\n", author.c_str());
printf("title: %s\n", title.c_str());
printf("duration: %d\n", duration);
//printf("description: %s\n", description.c_str());
printf("urls: %d\n", (int)formats.size());
for (yt_urlmap_iterator_t it = formats.begin(); it != formats.end(); ++it) {
printf("format %d type [%s] url %s\n", it->first, it->second.type.c_str(), it->second.GetUrl().c_str());
}
printf("===================================================================\n");
}
std::string cYTVideoInfo::GetUrl(int *fmt, bool mandatory)
{
int default_fmt = 0;
if (!*fmt)
fmt = &default_fmt;
yt_urlmap_iterator_t it;
if (fmt) {
if ((it = formats.find(*fmt)) != formats.end()) {
return it->second.GetUrl();
}
if (mandatory) {
*fmt = 0;
return "";
}
}
for (int *fmtp = itags; *fmtp; fmtp++)
if ((it = formats.find(*fmtp)) != formats.end()) {
*fmt = *fmtp;
return it->second.GetUrl();
}
return "";
}
cYTFeedParser::cYTFeedParser()
{
thumbnail_dir = "/tmp/ytparser";
parsed = false;
feedmode = -1;
tquality = "mqdefault";
max_results = 25;
concurrent_downloads = 2;
curl_handle = curl_easy_init();
#ifdef YOUTUBE_DEV_ID
key = YOUTUBE_DEV_ID;
#else
key = g_settings.youtube_dev_id;
#endif
}
cYTFeedParser::~cYTFeedParser()
{
curl_easy_cleanup(curl_handle);
}
size_t cYTFeedParser::CurlWriteToString(void *ptr, size_t size, size_t nmemb, void *data)
{
if (size * nmemb > 0) {
std::string* pStr = (std::string*) data;
pStr->append((char*) ptr, nmemb);
}
return size*nmemb;
}
bool cYTFeedParser::getUrl(std::string &url, std::string &answer, CURL *_curl_handle)
{
if (!_curl_handle)
_curl_handle = curl_handle;
curl_easy_setopt(_curl_handle, CURLOPT_URL, url.c_str());
curl_easy_setopt(_curl_handle, CURLOPT_WRITEFUNCTION, &cYTFeedParser::CurlWriteToString);
curl_easy_setopt(_curl_handle, CURLOPT_FILE, (void *)&answer);
curl_easy_setopt(_curl_handle, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(_curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
curl_easy_setopt(_curl_handle, CURLOPT_NOSIGNAL, (long)1);
curl_easy_setopt(_curl_handle, CURLOPT_SSL_VERIFYPEER, false);
if(!g_settings.softupdate_proxyserver.empty()) {
curl_easy_setopt(_curl_handle, CURLOPT_PROXY, g_settings.softupdate_proxyserver.c_str());
if(!g_settings.softupdate_proxyusername.empty()) {
std::string tmp = g_settings.softupdate_proxyusername + ":" + g_settings.softupdate_proxypassword;
curl_easy_setopt(_curl_handle, CURLOPT_PROXYUSERPWD, tmp.c_str());
}
}
char cerror[CURL_ERROR_SIZE] = {0};
curl_easy_setopt(_curl_handle, CURLOPT_ERRORBUFFER, cerror);
printf("try to get [%s] ...\n", url.c_str());
CURLcode httpres = curl_easy_perform(_curl_handle);
printf("http: res %d size %d\n", httpres, (int)answer.size());
if (httpres != 0 || answer.empty()) {
printf("error: %s\n", cerror);
return false;
}
return true;
}
bool cYTFeedParser::DownloadUrl(std::string &url, std::string &file, CURL *_curl_handle)
{
if (!_curl_handle)
_curl_handle = curl_handle;
FILE * fp = fopen(file.c_str(), "wb");
if (fp == NULL) {
perror(file.c_str());
return false;
}
curl_easy_setopt(_curl_handle, CURLOPT_URL, url.c_str());
curl_easy_setopt(_curl_handle, CURLOPT_WRITEFUNCTION, NULL);
curl_easy_setopt(_curl_handle, CURLOPT_FILE, fp);
curl_easy_setopt(_curl_handle, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(_curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
curl_easy_setopt(_curl_handle, CURLOPT_NOSIGNAL, (long)1);
curl_easy_setopt(_curl_handle, CURLOPT_SSL_VERIFYPEER, false);
if(!g_settings.softupdate_proxyserver.empty()) {
curl_easy_setopt(_curl_handle, CURLOPT_PROXY, g_settings.softupdate_proxyserver.c_str());
if(!g_settings.softupdate_proxyusername.empty()) {
std::string tmp = g_settings.softupdate_proxyusername + ":" + g_settings.softupdate_proxypassword;
curl_easy_setopt(_curl_handle, CURLOPT_PROXYUSERPWD, tmp.c_str());
}
}
char cerror[CURL_ERROR_SIZE] = {0};
curl_easy_setopt(_curl_handle, CURLOPT_ERRORBUFFER, cerror);
printf("try to get [%s] ...\n", url.c_str());
CURLcode httpres = curl_easy_perform(_curl_handle);
double dsize;
curl_easy_getinfo(_curl_handle, CURLINFO_SIZE_DOWNLOAD, &dsize);
fclose(fp);
printf("http: res %d size %g.\n", httpres, dsize);
if (httpres != 0) {
printf("curl error: %s\n", cerror);
unlink(file.c_str());
return false;
}
return true;
}
void cYTFeedParser::decodeUrl(std::string &url)
{
char * str = curl_easy_unescape(curl_handle, url.c_str(), 0, NULL);
if(str)
url = str;
curl_free(str);
}
void cYTFeedParser::encodeUrl(std::string &txt)
{
char * str = curl_easy_escape(curl_handle, txt.c_str(), txt.length());
if(str)
txt = str;
curl_free(str);
}
void cYTFeedParser::splitString(std::string &str, std::string delim, std::vector<std::string> &strlist, int start)
{
strlist.clear();
std::string::size_type end = 0;
while ((end = str.find(delim, start)) != std::string::npos) {
strlist.push_back(str.substr(start, end - start));
start = end + delim.size();
}
strlist.push_back(str.substr(start));
}
void cYTFeedParser::splitString(std::string &str, std::string delim, std::map<std::string,std::string> &strmap, int start)
{
std::string::size_type end = 0;
if ((end = str.find(delim, start)) != std::string::npos) {
strmap[str.substr(start, end - start)] = str.substr(end - start + delim.size());
}
}
bool cYTFeedParser::saveToFile(const char * name, std::string str)
{
FILE * fp = fopen(name, "w+");
if (fp) {
fprintf(fp, "%s", str.c_str());
fclose(fp);
return false;
}
printf("cYTFeedParser::saveToFile: failed to open %s\n", name);
return false;
}
std::string cYTFeedParser::getXmlName(xmlNodePtr node)
{
std::string result;
const char * name = xmlGetName(node);
if (name)
result = name;
return result;
}
std::string cYTFeedParser::getXmlAttr(xmlNodePtr node, const char * attr)
{
std::string result;
const char * value = xmlGetAttribute(node, attr);
if (value)
result = value;
return result;
}
std::string cYTFeedParser::getXmlData(xmlNodePtr node)
{
std::string result;
const char * value = xmlGetData(node);
if (value)
result = value;
return result;
}
bool cYTFeedParser::parseFeedJSON(std::string &answer)
{
Json::Value root;
Json::Reader reader;
std::ostringstream ss;
std::ifstream fh(curfeedfile.c_str(),std::ifstream::in);
ss << fh.rdbuf();
std::string filedata = ss.str();
bool parsedSuccess = reader.parse(filedata,root,false);
if(!parsedSuccess)
{
parsedSuccess = reader.parse(answer,root,false);
}
if(!parsedSuccess)
{
printf("Failed to parse JSON\n");
printf("%s\n", reader.getFormattedErrorMessages().c_str());
return false;
}
next.clear();
prev.clear();
//TODO
total.clear();
start.clear();
next = root.get("nextPageToken", "").asString();
prev = root.get("prevPageToken", "").asString();
cYTVideoInfo vinfo;
Json::Value elements = root["items"];
for(unsigned int i=0; i<elements.size();++i)
{
#ifdef DEBUG_PARSER
printf("=========================================================\n");
printf("Element %d in elements\n", i);
printf("%s\n", elements[i]);
#endif
if(elements[i]["id"].type() == Json::objectValue) {
vinfo.id = elements[i]["id"].get("videoId", "").asString();
}
else if(elements[i]["id"].type() == Json::stringValue) {
vinfo.id = elements[i].get("id", "").asString();
}
vinfo.title = elements[i]["snippet"].get("title", "").asString();
vinfo.description = elements[i]["snippet"].get("description", "").asString();
vinfo.published = elements[i]["snippet"].get("publishedAt", "").asString().substr(0, 10);
std::string thumbnail = elements[i]["snippet"]["thumbnails"]["default"].get("url", "").asString();
// save thumbnail "default", if "high" not found
vinfo.thumbnail = elements[i]["snippet"]["thumbnails"]["high"].get("url", thumbnail).asString();
vinfo.author = elements[i]["snippet"].get("channelTitle", "unkown").asString();
vinfo.category = "";
parseFeedDetailsJSON(&vinfo);
#ifdef DEBUG_PARSER
printf("prevPageToken: %s\n", prevPageToken.c_str());
printf("nextPageToken: %s\n", nextPageToken.c_str());
printf("vinfo.id: %s\n", vinfo.id.c_str());
printf("vinfo.description: %s\n", vinfo.description.c_str());
printf("vinfo.published: %s\n", vinfo.published.c_str());
printf("vinfo.title: %s\n", vinfo.title.c_str());
printf("vinfo.thumbnail: %s\n", vinfo.thumbnail.c_str());
#endif
if (!vinfo.id.empty()) {
vinfo.ret = false;
videos.push_back(vinfo);
}
}
GetVideoUrls();
std::vector<cYTVideoInfo>::iterator pos = videos.begin();
while (pos != videos.end())
if ((*pos).ret)
++pos;
else
pos = videos.erase(pos);
parsed = !videos.empty();
return parsed;
}
bool cYTFeedParser::parseFeedDetailsJSON(cYTVideoInfo* vinfo)
{
vinfo->duration = 0;
// See at https://developers.google.com/youtube/v3/docs/videos
std::string url = "https://www.googleapis.com/youtube/v3/videos?id=" + vinfo->id + "&part=contentDetails&key=" + key;
std::string answer;
if (!getUrl(url, answer))
return false;
Json::Value root;
Json::Reader reader;
bool parsedSuccess = reader.parse(answer, root, false);
if (!parsedSuccess) {
printf("Failed to parse JSON\n");
printf("%s\n", reader.getFormattedErrorMessages().c_str());
return false;
}
Json::Value elements = root["items"];
std::string duration = elements[0]["contentDetails"].get("duration", "").asString();
if (duration.find("PT") != std::string::npos) {
int h=0, m=0, s=0;
if (duration.find("H") != std::string::npos) {
sscanf(duration.c_str(), "PT%dH%dM%dS", &h, &m, &s);
}
else if (duration.find("M") != std::string::npos) {
sscanf(duration.c_str(), "PT%dM%dS", &m, &s);
}
else if (duration.find("S") != std::string::npos) {
sscanf(duration.c_str(), "PT%dS", &s);
}
// printf(">>>>> duration: %s, h: %d, m: %d, s: %d\n", duration.c_str(), h, m, s);
vinfo->duration = h*3600 + m*60 + s;
}
return true;
}
bool cYTFeedParser::supportedFormat(int fmt)
{
for (int *fmtp = itags; *fmtp; fmtp++)
if (*fmtp == fmt)
return true;
return false;
}
bool cYTFeedParser::decodeVideoInfo(std::string &answer, cYTVideoInfo &vinfo)
{
bool ret = false;
decodeUrl(answer);
#if 0
std::string infofile = thumbnail_dir;
infofile += "/";
infofile += vinfo.id;
infofile += ".txt";
saveToFile(infofile.c_str(), answer);
#endif
if(answer.find("token=") == std::string::npos)
return ret;
//FIXME check expire
std::vector<std::string> ulist;
std::string::size_type fmt = answer.find("url_encoded_fmt_stream_map=");
if (fmt != std::string::npos) {
fmt = answer.find("=", fmt);
splitString(answer, ",", ulist, fmt+1);
for (unsigned i = 0; i < ulist.size(); i++) {
#if 0 // to decode all params
decodeUrl(ulist[i]);
printf("URL: %s\n", ulist[i].c_str());
#endif
std::map<std::string,std::string> smap;
std::vector<std::string> uparams;
splitString(ulist[i], "&", uparams);
if (uparams.size() < 3)
continue;
for (unsigned j = 0; j < uparams.size(); j++) {
decodeUrl(uparams[j]);
#ifdef DEBUG_PARSER
printf(" param: %s\n", uparams[j].c_str());
#endif
splitString(uparams[j], "=", smap);
}
#ifdef DEBUG_PARSER
printf("=========================================================\n");
#endif
cYTVideoUrl yurl;
yurl.url = smap["url"];
std::string::size_type ptr = smap["url"].find("signature=");
if (ptr != std::string::npos)
{
ptr = smap["url"].find("=", ptr);
smap["url"].erase(0,ptr+1);
if((ptr = smap["url"].find("&")) != std::string::npos)
yurl.sig = smap["url"].substr(0,ptr);
}
int id = atoi(smap["itag"].c_str());
if (supportedFormat(id) && !yurl.url.empty() && !yurl.sig.empty()) {
yurl.quality = smap["quality"];
yurl.type = smap["type"];
vinfo.formats.insert(yt_urlmap_pair_t(id, yurl));
ret = true;
}
}
}
return ret;
}
bool cYTFeedParser::ParseFeed(std::string &url)
{
videos.clear();
std::string answer;
curfeedfile = thumbnail_dir;
curfeedfile += "/";
curfeedfile += curfeed;
curfeedfile += ".xml";
#ifdef CACHE_FILES
if(!DownloadUrl(url, cfile))
return false;
#else
if (!getUrl(url, answer))
return false;
#endif
return parseFeedJSON(answer);
}
bool cYTFeedParser::ParseFeed(yt_feed_mode_t mode, std::string search, std::string vid, yt_feed_orderby_t orderby)
{
std::string answer;
std::string url = "https://www.googleapis.com/youtube/v3/search?";
bool append_res = true;
std::string trailer;
if (mode < FEED_LAST) {
switch(mode) {
//FIXME APIv3: we dont have the parameter "time".
case MOST_POPULAR:
default:
//trailer = "&time=today";
curfeed = "&chart=mostPopular";
case MOST_POPULAR_ALL_TIME:
curfeed = "&chart=mostPopular";
break;
}
url = "https://www.googleapis.com/youtube/v3/videos?part=snippet";
if (!region.empty()) {
url += "&regionCode=";
url += region;
}
url += curfeed;
}
else if (mode == NEXT) {
if (next.empty())
return false;
url = nextprevurl;
url += "&pageToken=";
url += next;
append_res = false;
}
else if (mode == PREV) {
if (prev.empty())
return false;
url = nextprevurl;
url += "&pageToken=";
url += prev;
append_res = false;
}
else if (mode == RELATED) {
if (vid.empty())
return false;
url = "https://www.googleapis.com/youtube/v3/videos/";
url += vid;
url += "/related?";
}
else if (mode == SEARCH) {
if (search.empty())
return false;
encodeUrl(search);
url = "https://www.googleapis.com/youtube/v3/search?q=";
url += search;
url += "&part=snippet";
//FIXME locale for "title" and "videoCount"
const char *orderby_values[] = { "date","relevance","viewCount","rating","title","videoCount"};
url += "&order=" + std::string(orderby_values[orderby & 3]);
}
feedmode = mode;
if (append_res) {
url += "&maxResults=";
char res[10];
sprintf(res, "%d", max_results);
url+= res;
url += "&key=" + key;
nextprevurl = url;
}
return ParseFeed(url);
}
bool cYTFeedParser::ParseVideoInfo(cYTVideoInfo &vinfo, CURL *_curl_handle)
{
bool ret = false;
std::vector<std::string> estr;
estr.push_back("&el=embedded");
estr.push_back("&el=vevo");
estr.push_back("&el=detailpage");
for (unsigned i = 0; i < estr.size(); i++) {
std::string vurl = "http://www.youtube.com/get_video_info?video_id=";
vurl += vinfo.id;
vurl += estr[i];
vurl += "&ps=default&eurl=&gl=US&hl=en";
printf("cYTFeedParser::ParseVideoInfo: get [%s]\n", vurl.c_str());
std::string answer;
if (!getUrl(vurl, answer, _curl_handle))
continue;
ret = decodeVideoInfo(answer, vinfo);
if (ret)
break;
}
vinfo.ret = ret;
return ret;
}
void *cYTFeedParser::DownloadThumbnailsThread(void *arg)
{
set_threadname("YT::DownloadThumbnails");
bool ret = true;
cYTFeedParser *caller = (cYTFeedParser *)arg;
CURL *c = curl_easy_init();
unsigned int i;
do {
OpenThreads::ScopedLock<OpenThreads::Mutex> m_lock(caller->mutex);
i = caller->worker_index++;
} while (i < caller->videos.size() && ((ret &= caller->DownloadThumbnail(caller->videos[i], c)) || true));
curl_easy_cleanup(c);
pthread_exit(&ret);
}
bool cYTFeedParser::DownloadThumbnail(cYTVideoInfo &vinfo, CURL *_curl_handle)
{
if (!_curl_handle)
_curl_handle = curl_handle;
bool found = false;
if (!vinfo.thumbnail.empty()) {
std::string fname = thumbnail_dir + "/" + vinfo.id + ".jpg";
found = !access(fname, F_OK);
if (!found) {
for (int *fmtp = itags; *fmtp && !found; fmtp++)
found = cYTCache::getInstance()->getNameIfExists(fname, vinfo.id, *fmtp);
}
if (!found)
found = DownloadUrl(vinfo.thumbnail, fname, _curl_handle);
if (found)
vinfo.tfile = fname;
}
return found;
}
bool cYTFeedParser::DownloadThumbnails()
{
bool ret = true;
if (mkdir(thumbnail_dir.c_str(), 0755) && errno != EEXIST) {
perror(thumbnail_dir.c_str());
return false;
}
unsigned int max_threads = concurrent_downloads;
if (videos.size() < max_threads)
max_threads = videos.size();
pthread_t ta[max_threads];
worker_index = 0;
for (unsigned i = 0; i < max_threads; i++)
pthread_create(&ta[i], NULL, cYTFeedParser::DownloadThumbnailsThread, this);
for (unsigned i = 0; i < max_threads; i++) {
void *r;
pthread_join(ta[i], &r);
ret &= *((bool *)r);
}
return ret;
}
void *cYTFeedParser::GetVideoUrlsThread(void *arg)
{
set_threadname("YT::GetVideoUrls");
int ret = 0;
cYTFeedParser *caller = (cYTFeedParser *)arg;
CURL *c = curl_easy_init();
unsigned int i;
do {
OpenThreads::ScopedLock<OpenThreads::Mutex> m_lock(caller->mutex);
i = caller->worker_index++;
} while (i < caller->videos.size() && ((ret |= caller->ParseVideoInfo(caller->videos[i], c)) || true));
curl_easy_cleanup(c);
pthread_exit(&ret);
}
bool cYTFeedParser::GetVideoUrls()
{
int ret = 0;
unsigned int max_threads = concurrent_downloads;
if (videos.size() < max_threads)
max_threads = videos.size();
pthread_t ta[max_threads];
worker_index = 0;
for (unsigned i = 0; i < max_threads; i++)
pthread_create(&ta[i], NULL, cYTFeedParser::GetVideoUrlsThread, this);
for (unsigned i = 0; i < max_threads; i++) {
void *r;
pthread_join(ta[i], &r);
ret |= *((int *)r);
}
return ret;
}
void cYTFeedParser::Cleanup(bool delete_thumbnails)
{
printf("cYTFeedParser::Cleanup: %d videos\n", (int)videos.size());
if (delete_thumbnails) {
for (unsigned i = 0; i < videos.size(); i++) {
unlink(videos[i].tfile.c_str());
}
}
unlink(curfeedfile.c_str());
videos.clear();
parsed = false;
feedmode = -1;
}
void cYTFeedParser::SetThumbnailDir(std::string &_thumbnail_dir)
{
thumbnail_dir = _thumbnail_dir;
}
void cYTFeedParser::Dump()
{
printf("feed: %d videos\n", (int)videos.size());
for (unsigned i = 0; i < videos.size(); i++)
videos[i].Dump();
}